Bug Summary

File:lib/Target/X86/X86ISelLowering.cpp
Warning:line 6719, column 1
Potential leak of memory pointed to by 'UndefMask.X'

Annotated Source Code

[?] Use j/k keys for keyboard navigation

/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86ISelLowering.h"
16#include "Utils/X86ShuffleDecode.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86ShuffleDecodeConstantPool.h"
23#include "X86TargetMachine.h"
24#include "X86TargetObjectFile.h"
25#include "llvm/ADT/SmallBitVector.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringExtras.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/CodeGen/IntrinsicLowering.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineJumpTableInfo.h"
36#include "llvm/CodeGen/MachineModuleInfo.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/TargetLowering.h"
39#include "llvm/CodeGen/WinEHFuncInfo.h"
40#include "llvm/IR/CallSite.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/Constants.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/DiagnosticInfo.h"
45#include "llvm/IR/Function.h"
46#include "llvm/IR/GlobalAlias.h"
47#include "llvm/IR/GlobalVariable.h"
48#include "llvm/IR/Instructions.h"
49#include "llvm/IR/Intrinsics.h"
50#include "llvm/MC/MCAsmInfo.h"
51#include "llvm/MC/MCContext.h"
52#include "llvm/MC/MCExpr.h"
53#include "llvm/MC/MCSymbol.h"
54#include "llvm/Support/CommandLine.h"
55#include "llvm/Support/Debug.h"
56#include "llvm/Support/ErrorHandling.h"
57#include "llvm/Support/KnownBits.h"
58#include "llvm/Support/MathExtras.h"
59#include "llvm/Target/TargetOptions.h"
60#include <algorithm>
61#include <bitset>
62#include <cctype>
63#include <numeric>
64using namespace llvm;
65
66#define DEBUG_TYPE"x86-isel" "x86-isel"
67
68STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
69
70static cl::opt<bool> ExperimentalVectorWideningLegalization(
71 "x86-experimental-vector-widening-legalization", cl::init(false),
72 cl::desc("Enable an experimental vector type legalization through widening "
73 "rather than promotion."),
74 cl::Hidden);
75
76static cl::opt<int> ExperimentalPrefLoopAlignment(
77 "x86-experimental-pref-loop-alignment", cl::init(4),
78 cl::desc("Sets the preferable loop alignment for experiments "
79 "(the last x86-experimental-pref-loop-alignment bits"
80 " of the loop header PC will be 0)."),
81 cl::Hidden);
82
83static cl::opt<bool> MulConstantOptimization(
84 "mul-constant-optimization", cl::init(true),
85 cl::desc("Replace 'mul x, Const' with more effective instructions like "
86 "SHIFT, LEA, etc."),
87 cl::Hidden);
88
89/// Call this when the user attempts to do something unsupported, like
90/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
91/// report_fatal_error, so calling code should attempt to recover without
92/// crashing.
93static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
94 const char *Msg) {
95 MachineFunction &MF = DAG.getMachineFunction();
96 DAG.getContext()->diagnose(
97 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
98}
99
100X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
101 const X86Subtarget &STI)
102 : TargetLowering(TM), Subtarget(STI) {
103 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
104 X86ScalarSSEf64 = Subtarget.hasSSE2();
105 X86ScalarSSEf32 = Subtarget.hasSSE1();
106 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
107
108 // Set up the TargetLowering object.
109
110 // X86 is weird. It always uses i8 for shift amounts and setcc results.
111 setBooleanContents(ZeroOrOneBooleanContent);
112 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
113 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
114
115 // For 64-bit, since we have so many registers, use the ILP scheduler.
116 // For 32-bit, use the register pressure specific scheduling.
117 // For Atom, always use ILP scheduling.
118 if (Subtarget.isAtom())
119 setSchedulingPreference(Sched::ILP);
120 else if (Subtarget.is64Bit())
121 setSchedulingPreference(Sched::ILP);
122 else
123 setSchedulingPreference(Sched::RegPressure);
124 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
125 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
126
127 // Bypass expensive divides and use cheaper ones.
128 if (TM.getOptLevel() >= CodeGenOpt::Default) {
129 if (Subtarget.hasSlowDivide32())
130 addBypassSlowDiv(32, 8);
131 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
132 addBypassSlowDiv(64, 32);
133 }
134
135 if (Subtarget.isTargetKnownWindowsMSVC() ||
136 Subtarget.isTargetWindowsItanium()) {
137 // Setup Windows compiler runtime calls.
138 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
139 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
140 setLibcallName(RTLIB::SREM_I64, "_allrem");
141 setLibcallName(RTLIB::UREM_I64, "_aullrem");
142 setLibcallName(RTLIB::MUL_I64, "_allmul");
143 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
144 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
145 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
146 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
147 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
148 }
149
150 if (Subtarget.isTargetDarwin()) {
151 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
152 setUseUnderscoreSetJmp(false);
153 setUseUnderscoreLongJmp(false);
154 } else if (Subtarget.isTargetWindowsGNU()) {
155 // MS runtime is weird: it exports _setjmp, but longjmp!
156 setUseUnderscoreSetJmp(true);
157 setUseUnderscoreLongJmp(false);
158 } else {
159 setUseUnderscoreSetJmp(true);
160 setUseUnderscoreLongJmp(true);
161 }
162
163 // Set up the register classes.
164 addRegisterClass(MVT::i8, &X86::GR8RegClass);
165 addRegisterClass(MVT::i16, &X86::GR16RegClass);
166 addRegisterClass(MVT::i32, &X86::GR32RegClass);
167 if (Subtarget.is64Bit())
168 addRegisterClass(MVT::i64, &X86::GR64RegClass);
169
170 for (MVT VT : MVT::integer_valuetypes())
171 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
172
173 // We don't accept any truncstore of integer registers.
174 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
175 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
176 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
177 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
178 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
179 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
180
181 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
182
183 // SETOEQ and SETUNE require checking two conditions.
184 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
185 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
186 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
187 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
188 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
189 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
190
191 // Integer absolute.
192 if (Subtarget.hasCMov()) {
193 setOperationAction(ISD::ABS , MVT::i16 , Custom);
194 setOperationAction(ISD::ABS , MVT::i32 , Custom);
195 if (Subtarget.is64Bit())
196 setOperationAction(ISD::ABS , MVT::i64 , Custom);
197 }
198
199 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
200 // operation.
201 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
202 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
203 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
204
205 if (Subtarget.is64Bit()) {
206 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
207 // f32/f64 are legal, f80 is custom.
208 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
209 else
210 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
211 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
212 } else if (!Subtarget.useSoftFloat()) {
213 // We have an algorithm for SSE2->double, and we turn this into a
214 // 64-bit FILD followed by conditional FADD for other targets.
215 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
216 // We have an algorithm for SSE2, and we turn this into a 64-bit
217 // FILD or VCVTUSI2SS/SD for other targets.
218 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
219 }
220
221 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
222 // this operation.
223 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
224 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
225
226 if (!Subtarget.useSoftFloat()) {
227 // SSE has no i16 to fp conversion, only i32.
228 if (X86ScalarSSEf32) {
229 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
230 // f32 and f64 cases are Legal, f80 case is not
231 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
232 } else {
233 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
234 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
235 }
236 } else {
237 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
238 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
239 }
240
241 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
242 // this operation.
243 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
244 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
245
246 if (!Subtarget.useSoftFloat()) {
247 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
248 // are Legal, f80 is custom lowered.
249 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
250 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
251
252 if (X86ScalarSSEf32) {
253 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
254 // f32 and f64 cases are Legal, f80 case is not
255 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
256 } else {
257 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
258 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
259 }
260 } else {
261 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
262 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
263 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
264 }
265
266 // Handle FP_TO_UINT by promoting the destination to a larger signed
267 // conversion.
268 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
269 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
270 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
271
272 if (Subtarget.is64Bit()) {
273 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
274 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
275 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
276 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
277 } else {
278 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
279 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
280 }
281 } else if (!Subtarget.useSoftFloat()) {
282 // Since AVX is a superset of SSE3, only check for SSE here.
283 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
284 // Expand FP_TO_UINT into a select.
285 // FIXME: We would like to use a Custom expander here eventually to do
286 // the optimal thing for SSE vs. the default expansion in the legalizer.
287 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
288 else
289 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
290 // With SSE3 we can use fisttpll to convert to a signed i64; without
291 // SSE, we're stuck with a fistpll.
292 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
293
294 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
295 }
296
297 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
298 if (!X86ScalarSSEf64) {
299 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
300 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
301 if (Subtarget.is64Bit()) {
302 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
303 // Without SSE, i64->f64 goes through memory.
304 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
305 }
306 } else if (!Subtarget.is64Bit())
307 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
308
309 // Scalar integer divide and remainder are lowered to use operations that
310 // produce two results, to match the available instructions. This exposes
311 // the two-result form to trivial CSE, which is able to combine x/y and x%y
312 // into a single instruction.
313 //
314 // Scalar integer multiply-high is also lowered to use two-result
315 // operations, to match the available instructions. However, plain multiply
316 // (low) operations are left as Legal, as there are single-result
317 // instructions for this in x86. Using the two-result multiply instructions
318 // when both high and low results are needed must be arranged by dagcombine.
319 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
320 setOperationAction(ISD::MULHS, VT, Expand);
321 setOperationAction(ISD::MULHU, VT, Expand);
322 setOperationAction(ISD::SDIV, VT, Expand);
323 setOperationAction(ISD::UDIV, VT, Expand);
324 setOperationAction(ISD::SREM, VT, Expand);
325 setOperationAction(ISD::UREM, VT, Expand);
326 }
327
328 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
329 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
330 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
331 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
332 setOperationAction(ISD::BR_CC, VT, Expand);
333 setOperationAction(ISD::SELECT_CC, VT, Expand);
334 }
335 if (Subtarget.is64Bit())
336 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
337 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
338 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
339 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
340 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
341
342 setOperationAction(ISD::FREM , MVT::f32 , Expand);
343 setOperationAction(ISD::FREM , MVT::f64 , Expand);
344 setOperationAction(ISD::FREM , MVT::f80 , Expand);
345 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
346
347 // Promote the i8 variants and force them on up to i32 which has a shorter
348 // encoding.
349 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
350 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
351 if (!Subtarget.hasBMI()) {
352 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
353 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
354 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
355 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
356 if (Subtarget.is64Bit()) {
357 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
358 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
359 }
360 }
361
362 if (Subtarget.hasLZCNT()) {
363 // When promoting the i8 variants, force them to i32 for a shorter
364 // encoding.
365 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
366 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
367 } else {
368 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
369 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
370 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
371 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
372 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
373 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
374 if (Subtarget.is64Bit()) {
375 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
376 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
377 }
378 }
379
380 // Special handling for half-precision floating point conversions.
381 // If we don't have F16C support, then lower half float conversions
382 // into library calls.
383 if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
384 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
385 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
386 }
387
388 // There's never any support for operations beyond MVT::f32.
389 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
390 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
391 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
392 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
393
394 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
395 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
396 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
397 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
398 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
399 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
400
401 if (Subtarget.hasPOPCNT()) {
402 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
403 } else {
404 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
405 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
406 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
407 if (Subtarget.is64Bit())
408 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
409 }
410
411 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
412
413 if (!Subtarget.hasMOVBE())
414 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
415
416 // These should be promoted to a larger select which is supported.
417 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
418 // X86 wants to expand cmov itself.
419 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
420 setOperationAction(ISD::SELECT, VT, Custom);
421 setOperationAction(ISD::SETCC, VT, Custom);
422 }
423 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
424 if (VT == MVT::i64 && !Subtarget.is64Bit())
425 continue;
426 setOperationAction(ISD::SELECT, VT, Custom);
427 setOperationAction(ISD::SETCC, VT, Custom);
428 }
429
430 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
431 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
432 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
433
434 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
435 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
436 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
437 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
438 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
439 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
440 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
441 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
442
443 // Darwin ABI issue.
444 for (auto VT : { MVT::i32, MVT::i64 }) {
445 if (VT == MVT::i64 && !Subtarget.is64Bit())
446 continue;
447 setOperationAction(ISD::ConstantPool , VT, Custom);
448 setOperationAction(ISD::JumpTable , VT, Custom);
449 setOperationAction(ISD::GlobalAddress , VT, Custom);
450 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
451 setOperationAction(ISD::ExternalSymbol , VT, Custom);
452 setOperationAction(ISD::BlockAddress , VT, Custom);
453 }
454
455 // 64-bit shl, sra, srl (iff 32-bit x86)
456 for (auto VT : { MVT::i32, MVT::i64 }) {
457 if (VT == MVT::i64 && !Subtarget.is64Bit())
458 continue;
459 setOperationAction(ISD::SHL_PARTS, VT, Custom);
460 setOperationAction(ISD::SRA_PARTS, VT, Custom);
461 setOperationAction(ISD::SRL_PARTS, VT, Custom);
462 }
463
464 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
465 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
466
467 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
468
469 // Expand certain atomics
470 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
471 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
472 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
473 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
474 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
475 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
476 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
477 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
478 }
479
480 if (Subtarget.hasCmpxchg16b()) {
481 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
482 }
483
484 // FIXME - use subtarget debug flags
485 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
486 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
487 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
488 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
489 }
490
491 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
492 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
493
494 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
495 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
496
497 setOperationAction(ISD::TRAP, MVT::Other, Legal);
498 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
499
500 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
501 setOperationAction(ISD::VASTART , MVT::Other, Custom);
502 setOperationAction(ISD::VAEND , MVT::Other, Expand);
503 bool Is64Bit = Subtarget.is64Bit();
504 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
505 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
506
507 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
508 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
509
510 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
511
512 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
513 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
514 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
515
516 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
517 // f32 and f64 use SSE.
518 // Set up the FP register classes.
519 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
520 : &X86::FR32RegClass);
521 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
522 : &X86::FR64RegClass);
523
524 for (auto VT : { MVT::f32, MVT::f64 }) {
525 // Use ANDPD to simulate FABS.
526 setOperationAction(ISD::FABS, VT, Custom);
527
528 // Use XORP to simulate FNEG.
529 setOperationAction(ISD::FNEG, VT, Custom);
530
531 // Use ANDPD and ORPD to simulate FCOPYSIGN.
532 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
533
534 // We don't support sin/cos/fmod
535 setOperationAction(ISD::FSIN , VT, Expand);
536 setOperationAction(ISD::FCOS , VT, Expand);
537 setOperationAction(ISD::FSINCOS, VT, Expand);
538 }
539
540 // Lower this to MOVMSK plus an AND.
541 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
542 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
543
544 // Expand FP immediates into loads from the stack, except for the special
545 // cases we handle.
546 addLegalFPImmediate(APFloat(+0.0)); // xorpd
547 addLegalFPImmediate(APFloat(+0.0f)); // xorps
548 } else if (UseX87 && X86ScalarSSEf32) {
549 // Use SSE for f32, x87 for f64.
550 // Set up the FP register classes.
551 addRegisterClass(MVT::f32, &X86::FR32RegClass);
552 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
553
554 // Use ANDPS to simulate FABS.
555 setOperationAction(ISD::FABS , MVT::f32, Custom);
556
557 // Use XORP to simulate FNEG.
558 setOperationAction(ISD::FNEG , MVT::f32, Custom);
559
560 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
561
562 // Use ANDPS and ORPS to simulate FCOPYSIGN.
563 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
564 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
565
566 // We don't support sin/cos/fmod
567 setOperationAction(ISD::FSIN , MVT::f32, Expand);
568 setOperationAction(ISD::FCOS , MVT::f32, Expand);
569 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
570
571 // Special cases we handle for FP constants.
572 addLegalFPImmediate(APFloat(+0.0f)); // xorps
573 addLegalFPImmediate(APFloat(+0.0)); // FLD0
574 addLegalFPImmediate(APFloat(+1.0)); // FLD1
575 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
576 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
577
578 // Always expand sin/cos functions even though x87 has an instruction.
579 setOperationAction(ISD::FSIN , MVT::f64, Expand);
580 setOperationAction(ISD::FCOS , MVT::f64, Expand);
581 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
582 } else if (UseX87) {
583 // f32 and f64 in x87.
584 // Set up the FP register classes.
585 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
586 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
587
588 for (auto VT : { MVT::f32, MVT::f64 }) {
589 setOperationAction(ISD::UNDEF, VT, Expand);
590 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
591
592 // Always expand sin/cos functions even though x87 has an instruction.
593 setOperationAction(ISD::FSIN , VT, Expand);
594 setOperationAction(ISD::FCOS , VT, Expand);
595 setOperationAction(ISD::FSINCOS, VT, Expand);
596 }
597 addLegalFPImmediate(APFloat(+0.0)); // FLD0
598 addLegalFPImmediate(APFloat(+1.0)); // FLD1
599 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
600 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
601 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
602 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
603 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
604 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
605 }
606
607 // We don't support FMA.
608 setOperationAction(ISD::FMA, MVT::f64, Expand);
609 setOperationAction(ISD::FMA, MVT::f32, Expand);
610
611 // Long double always uses X87, except f128 in MMX.
612 if (UseX87) {
613 if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
614 addRegisterClass(MVT::f128, &X86::FR128RegClass);
615 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
616 setOperationAction(ISD::FABS , MVT::f128, Custom);
617 setOperationAction(ISD::FNEG , MVT::f128, Custom);
618 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
619 }
620
621 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
622 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
623 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
624 {
625 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
626 addLegalFPImmediate(TmpFlt); // FLD0
627 TmpFlt.changeSign();
628 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
629
630 bool ignored;
631 APFloat TmpFlt2(+1.0);
632 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
633 &ignored);
634 addLegalFPImmediate(TmpFlt2); // FLD1
635 TmpFlt2.changeSign();
636 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
637 }
638
639 // Always expand sin/cos functions even though x87 has an instruction.
640 setOperationAction(ISD::FSIN , MVT::f80, Expand);
641 setOperationAction(ISD::FCOS , MVT::f80, Expand);
642 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
643
644 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
645 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
646 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
647 setOperationAction(ISD::FRINT, MVT::f80, Expand);
648 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
649 setOperationAction(ISD::FMA, MVT::f80, Expand);
650 }
651
652 // Always use a library call for pow.
653 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
654 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
655 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
656
657 setOperationAction(ISD::FLOG, MVT::f80, Expand);
658 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
659 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
660 setOperationAction(ISD::FEXP, MVT::f80, Expand);
661 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
662 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
663 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
664
665 // Some FP actions are always expanded for vector types.
666 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
667 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
668 setOperationAction(ISD::FSIN, VT, Expand);
669 setOperationAction(ISD::FSINCOS, VT, Expand);
670 setOperationAction(ISD::FCOS, VT, Expand);
671 setOperationAction(ISD::FREM, VT, Expand);
672 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
673 setOperationAction(ISD::FPOW, VT, Expand);
674 setOperationAction(ISD::FLOG, VT, Expand);
675 setOperationAction(ISD::FLOG2, VT, Expand);
676 setOperationAction(ISD::FLOG10, VT, Expand);
677 setOperationAction(ISD::FEXP, VT, Expand);
678 setOperationAction(ISD::FEXP2, VT, Expand);
679 }
680
681 // First set operation action for all vector types to either promote
682 // (for widening) or expand (for scalarization). Then we will selectively
683 // turn on ones that can be effectively codegen'd.
684 for (MVT VT : MVT::vector_valuetypes()) {
685 setOperationAction(ISD::SDIV, VT, Expand);
686 setOperationAction(ISD::UDIV, VT, Expand);
687 setOperationAction(ISD::SREM, VT, Expand);
688 setOperationAction(ISD::UREM, VT, Expand);
689 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
690 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
691 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
692 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
693 setOperationAction(ISD::FMA, VT, Expand);
694 setOperationAction(ISD::FFLOOR, VT, Expand);
695 setOperationAction(ISD::FCEIL, VT, Expand);
696 setOperationAction(ISD::FTRUNC, VT, Expand);
697 setOperationAction(ISD::FRINT, VT, Expand);
698 setOperationAction(ISD::FNEARBYINT, VT, Expand);
699 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
700 setOperationAction(ISD::MULHS, VT, Expand);
701 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
702 setOperationAction(ISD::MULHU, VT, Expand);
703 setOperationAction(ISD::SDIVREM, VT, Expand);
704 setOperationAction(ISD::UDIVREM, VT, Expand);
705 setOperationAction(ISD::CTPOP, VT, Expand);
706 setOperationAction(ISD::CTTZ, VT, Expand);
707 setOperationAction(ISD::CTLZ, VT, Expand);
708 setOperationAction(ISD::ROTL, VT, Expand);
709 setOperationAction(ISD::ROTR, VT, Expand);
710 setOperationAction(ISD::BSWAP, VT, Expand);
711 setOperationAction(ISD::SETCC, VT, Expand);
712 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
713 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
714 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
715 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
716 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
717 setOperationAction(ISD::TRUNCATE, VT, Expand);
718 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
719 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
720 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
721 setOperationAction(ISD::SELECT_CC, VT, Expand);
722 for (MVT InnerVT : MVT::vector_valuetypes()) {
723 setTruncStoreAction(InnerVT, VT, Expand);
724
725 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
726 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
727
728 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
729 // types, we have to deal with them whether we ask for Expansion or not.
730 // Setting Expand causes its own optimisation problems though, so leave
731 // them legal.
732 if (VT.getVectorElementType() == MVT::i1)
733 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
734
735 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
736 // split/scalarized right now.
737 if (VT.getVectorElementType() == MVT::f16)
738 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
739 }
740 }
741
742 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
743 // with -msoft-float, disable use of MMX as well.
744 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
745 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
746 // No operations on x86mmx supported, everything uses intrinsics.
747 }
748
749 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
750 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
751 : &X86::VR128RegClass);
752
753 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
754 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
755 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
756 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
757 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
758 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
759 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
760 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
761 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
762 }
763
764 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
765 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
766 : &X86::VR128RegClass);
767
768 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
769 // registers cannot be used even for integer operations.
770 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
771 : &X86::VR128RegClass);
772 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
773 : &X86::VR128RegClass);
774 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
775 : &X86::VR128RegClass);
776 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
777 : &X86::VR128RegClass);
778
779 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
780 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
781 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
782 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
783 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
784 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
785 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
786 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
787 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
788 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
789 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
790 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
791 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
792
793 setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
794 setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
795 setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
796 setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
797
798 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
799 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
800 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
801
802 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
803 setOperationAction(ISD::SETCC, VT, Custom);
804 setOperationAction(ISD::CTPOP, VT, Custom);
805 setOperationAction(ISD::CTTZ, VT, Custom);
806 }
807
808 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
809 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
810 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
811 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
812 setOperationAction(ISD::VSELECT, VT, Custom);
813 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
814 }
815
816 // We support custom legalizing of sext and anyext loads for specific
817 // memory vector types which we can load as a scalar (or sequence of
818 // scalars) and extend in-register to a legal 128-bit vector type. For sext
819 // loads these must work with a single scalar load.
820 for (MVT VT : MVT::integer_vector_valuetypes()) {
821 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
822 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
823 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
824 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
825 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
826 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
827 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
828 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
829 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
830 }
831
832 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
833 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
834 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
835 setOperationAction(ISD::VSELECT, VT, Custom);
836
837 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
838 continue;
839
840 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
841 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
842 }
843
844 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
845 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
846 setOperationPromotedToType(ISD::AND, VT, MVT::v2i64);
847 setOperationPromotedToType(ISD::OR, VT, MVT::v2i64);
848 setOperationPromotedToType(ISD::XOR, VT, MVT::v2i64);
849 setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64);
850 setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
851 }
852
853 // Custom lower v2i64 and v2f64 selects.
854 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
855 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
856
857 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
858 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
859
860 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
861 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
862
863 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
864
865 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
866 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
867
868 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
869 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
870
871 for (MVT VT : MVT::fp_vector_valuetypes())
872 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
873
874 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
875 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
876 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
877
878 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
879 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
880 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
881
882 // In the customized shift lowering, the legal v4i32/v2i64 cases
883 // in AVX2 will be recognized.
884 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
885 setOperationAction(ISD::SRL, VT, Custom);
886 setOperationAction(ISD::SHL, VT, Custom);
887 setOperationAction(ISD::SRA, VT, Custom);
888 }
889 }
890
891 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
892 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
893 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
894 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
895 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
896 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
897 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
898 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
899 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
900 }
901
902 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
903 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
904 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
905 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
906 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
907 setOperationAction(ISD::FRINT, RoundedTy, Legal);
908 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
909 }
910
911 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
912 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
913 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
914 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
915 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
916 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
917 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
918 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
919
920 // FIXME: Do we need to handle scalar-to-vector here?
921 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
922
923 // We directly match byte blends in the backend as they match the VSELECT
924 // condition form.
925 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
926
927 // SSE41 brings specific instructions for doing vector sign extend even in
928 // cases where we don't have SRA.
929 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
930 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
931 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
932 }
933
934 for (MVT VT : MVT::integer_vector_valuetypes()) {
935 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
936 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
937 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
938 }
939
940 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
941 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
942 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
943 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
944 setLoadExtAction(LoadExtOp, MVT::v2i32, MVT::v2i8, Legal);
945 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
946 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
947 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
948 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
949 }
950
951 // i8 vectors are custom because the source register and source
952 // source memory operand types are not the same width.
953 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
954 }
955
956 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
957 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
958 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
959 setOperationAction(ISD::ROTL, VT, Custom);
960
961 // XOP can efficiently perform BITREVERSE with VPPERM.
962 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
963 setOperationAction(ISD::BITREVERSE, VT, Custom);
964
965 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
966 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
967 setOperationAction(ISD::BITREVERSE, VT, Custom);
968 }
969
970 if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
971 bool HasInt256 = Subtarget.hasInt256();
972
973 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
974 : &X86::VR256RegClass);
975 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
976 : &X86::VR256RegClass);
977 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
978 : &X86::VR256RegClass);
979 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
980 : &X86::VR256RegClass);
981 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
982 : &X86::VR256RegClass);
983 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
984 : &X86::VR256RegClass);
985
986 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
987 setOperationAction(ISD::FFLOOR, VT, Legal);
988 setOperationAction(ISD::FCEIL, VT, Legal);
989 setOperationAction(ISD::FTRUNC, VT, Legal);
990 setOperationAction(ISD::FRINT, VT, Legal);
991 setOperationAction(ISD::FNEARBYINT, VT, Legal);
992 setOperationAction(ISD::FNEG, VT, Custom);
993 setOperationAction(ISD::FABS, VT, Custom);
994 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
995 }
996
997 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
998 // even though v8i16 is a legal type.
999 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1000 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1001 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1002
1003 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1004 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1005
1006 for (MVT VT : MVT::fp_vector_valuetypes())
1007 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1008
1009 // In the customized shift lowering, the legal v8i32/v4i64 cases
1010 // in AVX2 will be recognized.
1011 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1012 setOperationAction(ISD::SRL, VT, Custom);
1013 setOperationAction(ISD::SHL, VT, Custom);
1014 setOperationAction(ISD::SRA, VT, Custom);
1015 }
1016
1017 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1018 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1019 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1020
1021 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1022 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1023 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1024 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1025 }
1026
1027 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1028 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1029 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1030 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1031
1032 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1033 setOperationAction(ISD::SETCC, VT, Custom);
1034 setOperationAction(ISD::CTPOP, VT, Custom);
1035 setOperationAction(ISD::CTTZ, VT, Custom);
1036 setOperationAction(ISD::CTLZ, VT, Custom);
1037 }
1038
1039 if (Subtarget.hasAnyFMA()) {
1040 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1041 MVT::v2f64, MVT::v4f64 })
1042 setOperationAction(ISD::FMA, VT, Legal);
1043 }
1044
1045 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1046 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1047 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1048 }
1049
1050 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1051 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1052 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1053 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1054
1055 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1056 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1057
1058 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1059 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1060 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1061 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1062
1063 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1064 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1065 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1066 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1067 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1068 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1069 }
1070
1071 if (HasInt256) {
1072 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
1073 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom);
1074 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
1075
1076 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1077 // when we have a 256bit-wide blend with immediate.
1078 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1079
1080 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1081 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1082 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1083 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1084 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1085 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1086 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1087 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1088 }
1089 }
1090
1091 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1092 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1093 setOperationAction(ISD::MLOAD, VT, Legal);
1094 setOperationAction(ISD::MSTORE, VT, Legal);
1095 }
1096
1097 // Extract subvector is special because the value type
1098 // (result) is 128-bit but the source is 256-bit wide.
1099 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1100 MVT::v4f32, MVT::v2f64 }) {
1101 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1102 }
1103
1104 // Custom lower several nodes for 256-bit types.
1105 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1106 MVT::v8f32, MVT::v4f64 }) {
1107 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1108 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1109 setOperationAction(ISD::VSELECT, VT, Custom);
1110 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1111 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1112 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1113 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1114 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1115 }
1116
1117 if (HasInt256)
1118 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1119
1120 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1121 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1122 setOperationPromotedToType(ISD::AND, VT, MVT::v4i64);
1123 setOperationPromotedToType(ISD::OR, VT, MVT::v4i64);
1124 setOperationPromotedToType(ISD::XOR, VT, MVT::v4i64);
1125 setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64);
1126 setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
1127 }
1128
1129 if (HasInt256) {
1130 // Custom legalize 2x32 to get a little better code.
1131 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1132 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1133
1134 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1135 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1136 setOperationAction(ISD::MGATHER, VT, Custom);
1137 }
1138 }
1139
1140 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1141 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1142 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1143 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1144 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1145
1146 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1147 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1148 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1149
1150 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1151 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1152 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1153
1154 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v16i1, MVT::v16i32);
1155 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v16i1, MVT::v16i32);
1156 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i1, MVT::v8i32);
1157 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i1, MVT::v8i32);
1158 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i1, MVT::v4i32);
1159 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i1, MVT::v4i32);
1160 setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
1161 setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
1162
1163 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i1, MVT::v16i32);
1164 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i1, MVT::v16i32);
1165 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1166 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1167 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1168 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1169 if (Subtarget.hasVLX()) {
1170 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1171 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1172 }
1173
1174 // Extends of v16i1/v8i1 to 128-bit vectors.
1175 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1176 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i8, Custom);
1177 setOperationAction(ISD::ANY_EXTEND, MVT::v16i8, Custom);
1178 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1179 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i16, Custom);
1180 setOperationAction(ISD::ANY_EXTEND, MVT::v8i16, Custom);
1181
1182 for (auto VT : { MVT::v8i1, MVT::v16i1 }) {
1183 setOperationAction(ISD::ADD, VT, Custom);
1184 setOperationAction(ISD::SUB, VT, Custom);
1185 setOperationAction(ISD::MUL, VT, Custom);
1186 setOperationAction(ISD::SETCC, VT, Custom);
1187 setOperationAction(ISD::SELECT, VT, Custom);
1188 setOperationAction(ISD::TRUNCATE, VT, Custom);
1189
1190 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1191 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1192 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1193 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1194 setOperationAction(ISD::VSELECT, VT, Expand);
1195 }
1196
1197 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1198 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1199 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1200 for (auto VT : { MVT::v1i1, MVT::v8i1 })
1201 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1202
1203 for (MVT VT : MVT::fp_vector_valuetypes())
1204 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1205
1206 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1207 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1208 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1209 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1210 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1211 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1212 }
1213
1214 for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16,
1215 MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32,
1216 MVT::v8i64, MVT::v32i16, MVT::v64i8}) {
1217 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
1218 setLoadExtAction(ISD::SEXTLOAD, VT, MaskVT, Custom);
1219 setLoadExtAction(ISD::ZEXTLOAD, VT, MaskVT, Custom);
1220 setLoadExtAction(ISD::EXTLOAD, VT, MaskVT, Custom);
1221 setTruncStoreAction(VT, MaskVT, Custom);
1222 }
1223
1224 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1225 setOperationAction(ISD::FNEG, VT, Custom);
1226 setOperationAction(ISD::FABS, VT, Custom);
1227 setOperationAction(ISD::FMA, VT, Legal);
1228 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1229 }
1230
1231 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1232 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i16, MVT::v16i32);
1233 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v16i8, MVT::v16i32);
1234 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1235 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i8, MVT::v16i32);
1236 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v16i16, MVT::v16i32);
1237 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1238 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1239
1240 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1241 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1242 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1243 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1244 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1245
1246 if (!Subtarget.hasVLX()) {
1247 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1248 // to 512-bit rather than use the AVX2 instructions so that we can use
1249 // k-masks.
1250 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1251 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1252 setOperationAction(ISD::MLOAD, VT, Custom);
1253 setOperationAction(ISD::MSTORE, VT, Custom);
1254 }
1255 }
1256
1257 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1258 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1259 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1260 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1261 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1262 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1263 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1264 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1265
1266 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1267 setOperationAction(ISD::FFLOOR, VT, Legal);
1268 setOperationAction(ISD::FCEIL, VT, Legal);
1269 setOperationAction(ISD::FTRUNC, VT, Legal);
1270 setOperationAction(ISD::FRINT, VT, Legal);
1271 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1272 }
1273
1274 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom);
1275 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
1276
1277 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1278 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1279 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1280
1281 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1282 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1283 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1284 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1285
1286 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1287 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1288
1289 setOperationAction(ISD::UMUL_LOHI, MVT::v16i32, Custom);
1290 setOperationAction(ISD::SMUL_LOHI, MVT::v16i32, Custom);
1291
1292 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1293 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1294 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1295
1296 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1297 setOperationAction(ISD::SMAX, VT, Legal);
1298 setOperationAction(ISD::UMAX, VT, Legal);
1299 setOperationAction(ISD::SMIN, VT, Legal);
1300 setOperationAction(ISD::UMIN, VT, Legal);
1301 setOperationAction(ISD::ABS, VT, Legal);
1302 setOperationAction(ISD::SRL, VT, Custom);
1303 setOperationAction(ISD::SHL, VT, Custom);
1304 setOperationAction(ISD::SRA, VT, Custom);
1305 setOperationAction(ISD::CTPOP, VT, Custom);
1306 setOperationAction(ISD::CTTZ, VT, Custom);
1307 setOperationAction(ISD::ROTL, VT, Custom);
1308 setOperationAction(ISD::ROTR, VT, Custom);
1309 }
1310
1311 // Need to promote to 64-bit even though we have 32-bit masked instructions
1312 // because the IR optimizers rearrange bitcasts around logic ops leaving
1313 // too many variations to handle if we don't promote them.
1314 setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
1315 setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64);
1316 setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
1317
1318 if (Subtarget.hasDQI()) {
1319 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1320 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1321 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1322 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1323 }
1324
1325 if (Subtarget.hasCDI()) {
1326 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1327 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1328 setOperationAction(ISD::CTLZ, VT, Legal);
1329 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
1330 }
1331 } // Subtarget.hasCDI()
1332
1333 if (Subtarget.hasVPOPCNTDQ()) {
1334 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1335 setOperationAction(ISD::CTPOP, VT, Legal);
1336 }
1337
1338 // Extract subvector is special because the value type
1339 // (result) is 256-bit but the source is 512-bit wide.
1340 // 128-bit was made Legal under AVX1.
1341 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1342 MVT::v8f32, MVT::v4f64 })
1343 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1344
1345 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1346 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1347 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1348 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1349 setOperationAction(ISD::VSELECT, VT, Custom);
1350 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1351 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1352 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1353 setOperationAction(ISD::MLOAD, VT, Legal);
1354 setOperationAction(ISD::MSTORE, VT, Legal);
1355 setOperationAction(ISD::MGATHER, VT, Custom);
1356 setOperationAction(ISD::MSCATTER, VT, Custom);
1357 }
1358 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
1359 setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
1360 setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
1361 }
1362 }// has AVX-512
1363
1364 if (!Subtarget.useSoftFloat() &&
1365 (Subtarget.hasAVX512() || Subtarget.hasVLX())) {
1366 // These operations are handled on non-VLX by artificially widening in
1367 // isel patterns.
1368 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1369
1370 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1371 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1372 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1373 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1374 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1375
1376 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1377 setOperationAction(ISD::SMAX, VT, Legal);
1378 setOperationAction(ISD::UMAX, VT, Legal);
1379 setOperationAction(ISD::SMIN, VT, Legal);
1380 setOperationAction(ISD::UMIN, VT, Legal);
1381 setOperationAction(ISD::ABS, VT, Legal);
1382 }
1383
1384 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1385 setOperationAction(ISD::ROTL, VT, Custom);
1386 setOperationAction(ISD::ROTR, VT, Custom);
1387 }
1388
1389 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1390 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1391 setOperationAction(ISD::MSCATTER, VT, Custom);
1392
1393 if (Subtarget.hasDQI()) {
1394 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1395 setOperationAction(ISD::SINT_TO_FP, VT, Legal);
1396 setOperationAction(ISD::UINT_TO_FP, VT, Legal);
1397 setOperationAction(ISD::FP_TO_SINT, VT, Legal);
1398 setOperationAction(ISD::FP_TO_UINT, VT, Legal);
1399 }
1400 }
1401
1402 if (Subtarget.hasCDI()) {
1403 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1404 setOperationAction(ISD::CTLZ, VT, Legal);
1405 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
1406 }
1407 } // Subtarget.hasCDI()
1408
1409 if (Subtarget.hasVPOPCNTDQ()) {
1410 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1411 setOperationAction(ISD::CTPOP, VT, Legal);
1412 }
1413 }
1414
1415 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1416 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1417 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1418
1419 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1420 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1421
1422 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1423 setOperationAction(ISD::ADD, VT, Custom);
1424 setOperationAction(ISD::SUB, VT, Custom);
1425 setOperationAction(ISD::MUL, VT, Custom);
1426 setOperationAction(ISD::VSELECT, VT, Expand);
1427
1428 setOperationAction(ISD::TRUNCATE, VT, Custom);
1429 setOperationAction(ISD::SETCC, VT, Custom);
1430 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1431 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1432 setOperationAction(ISD::SELECT, VT, Custom);
1433 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1434 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1435 }
1436
1437 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1438 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1439 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1440 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1441 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1442 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1443
1444 // Extends from v32i1 masks to 256-bit vectors.
1445 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1446 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1447 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1448 // Extends from v64i1 masks to 512-bit vectors.
1449 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1450 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1451 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1452
1453 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1454 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1455 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1456 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1457 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1458 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1459 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1460 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1461 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1462 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1463 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1464 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1465 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1466 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1467 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1468 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1469 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1470 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1471 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1472 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1473 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1474 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1475 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1476
1477 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1478
1479 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1480
1481 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1482 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1483 setOperationAction(ISD::VSELECT, VT, Custom);
1484 setOperationAction(ISD::ABS, VT, Legal);
1485 setOperationAction(ISD::SRL, VT, Custom);
1486 setOperationAction(ISD::SHL, VT, Custom);
1487 setOperationAction(ISD::SRA, VT, Custom);
1488 setOperationAction(ISD::MLOAD, VT, Legal);
1489 setOperationAction(ISD::MSTORE, VT, Legal);
1490 setOperationAction(ISD::CTPOP, VT, Custom);
1491 setOperationAction(ISD::CTTZ, VT, Custom);
1492 setOperationAction(ISD::CTLZ, VT, Custom);
1493 setOperationAction(ISD::SMAX, VT, Legal);
1494 setOperationAction(ISD::UMAX, VT, Legal);
1495 setOperationAction(ISD::SMIN, VT, Legal);
1496 setOperationAction(ISD::UMIN, VT, Legal);
1497
1498 setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
1499 setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
1500 setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
1501 }
1502
1503 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1504 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1505 }
1506
1507 if (Subtarget.hasBITALG()) {
1508 for (auto VT : { MVT::v64i8, MVT::v32i16 })
1509 setOperationAction(ISD::CTPOP, VT, Legal);
1510 }
1511 }
1512
1513 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI() &&
1514 (Subtarget.hasAVX512() || Subtarget.hasVLX())) {
1515 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1516 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1517 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1518 }
1519
1520 // These operations are handled on non-VLX by artificially widening in
1521 // isel patterns.
1522 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1523
1524 if (Subtarget.hasBITALG()) {
1525 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1526 setOperationAction(ISD::CTPOP, VT, Legal);
1527 }
1528 }
1529
1530 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1531 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1532 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1533
1534 for (auto VT : { MVT::v2i1, MVT::v4i1 }) {
1535 setOperationAction(ISD::ADD, VT, Custom);
1536 setOperationAction(ISD::SUB, VT, Custom);
1537 setOperationAction(ISD::MUL, VT, Custom);
1538 setOperationAction(ISD::VSELECT, VT, Expand);
1539
1540 setOperationAction(ISD::TRUNCATE, VT, Custom);
1541 setOperationAction(ISD::SETCC, VT, Custom);
1542 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1543 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1544 setOperationAction(ISD::SELECT, VT, Custom);
1545 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1546 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1547 }
1548
1549 // TODO: v8i1 concat should be legal without VLX to support concats of
1550 // v1i1, but we won't legalize it correctly currently without introducing
1551 // a v4i1 concat in the middle.
1552 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1553 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1554 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1555 for (auto VT : { MVT::v2i1, MVT::v4i1 })
1556 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1557
1558 // Extends from v2i1/v4i1 masks to 128-bit vectors.
1559 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
1560 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom);
1561 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1562 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
1563 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Custom);
1564 setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Custom);
1565
1566 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1567 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1568 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1569 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1570 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1571
1572 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1573 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1574 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1575 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1576 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1577
1578 if (Subtarget.hasDQI()) {
1579 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1580 // v2f32 UINT_TO_FP is already custom under SSE2.
1581 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1582 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 1583, __extension__ __PRETTY_FUNCTION__))
1583 "Unexpected operation action!")(static_cast <bool> (isOperationCustom(ISD::UINT_TO_FP,
MVT::v2f32) && "Unexpected operation action!") ? void
(0) : __assert_fail ("isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && \"Unexpected operation action!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 1583, __extension__ __PRETTY_FUNCTION__))
;
1584 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1585 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1586 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1587 }
1588
1589 if (Subtarget.hasBWI()) {
1590 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1591 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1592 }
1593 }
1594
1595 // We want to custom lower some of our intrinsics.
1596 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1597 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1598 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1599 if (!Subtarget.is64Bit()) {
1600 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1601 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1602 }
1603
1604 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1605 // handle type legalization for these operations here.
1606 //
1607 // FIXME: We really should do custom legalization for addition and
1608 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1609 // than generic legalization for 64-bit multiplication-with-overflow, though.
1610 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1611 if (VT == MVT::i64 && !Subtarget.is64Bit())
1612 continue;
1613 // Add/Sub/Mul with overflow operations are custom lowered.
1614 setOperationAction(ISD::SADDO, VT, Custom);
1615 setOperationAction(ISD::UADDO, VT, Custom);
1616 setOperationAction(ISD::SSUBO, VT, Custom);
1617 setOperationAction(ISD::USUBO, VT, Custom);
1618 setOperationAction(ISD::SMULO, VT, Custom);
1619 setOperationAction(ISD::UMULO, VT, Custom);
1620
1621 // Support carry in as value rather than glue.
1622 setOperationAction(ISD::ADDCARRY, VT, Custom);
1623 setOperationAction(ISD::SUBCARRY, VT, Custom);
1624 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1625 }
1626
1627 if (!Subtarget.is64Bit()) {
1628 // These libcalls are not available in 32-bit.
1629 setLibcallName(RTLIB::SHL_I128, nullptr);
1630 setLibcallName(RTLIB::SRL_I128, nullptr);
1631 setLibcallName(RTLIB::SRA_I128, nullptr);
1632 setLibcallName(RTLIB::MUL_I128, nullptr);
1633 }
1634
1635 // Combine sin / cos into _sincos_stret if it is available.
1636 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1637 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1638 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1639 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1640 }
1641
1642 if (Subtarget.isTargetWin64()) {
1643 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1644 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1645 setOperationAction(ISD::SREM, MVT::i128, Custom);
1646 setOperationAction(ISD::UREM, MVT::i128, Custom);
1647 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1648 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1649 }
1650
1651 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1652 // is. We should promote the value to 64-bits to solve this.
1653 // This is what the CRT headers do - `fmodf` is an inline header
1654 // function casting to f64 and calling `fmod`.
1655 if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
1656 Subtarget.isTargetWindowsItanium()))
1657 for (ISD::NodeType Op :
1658 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1659 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1660 if (isOperationExpand(Op, MVT::f32))
1661 setOperationAction(Op, MVT::f32, Promote);
1662
1663 // We have target-specific dag combine patterns for the following nodes:
1664 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1665 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1666 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1667 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
1668 setTargetDAGCombine(ISD::BITCAST);
1669 setTargetDAGCombine(ISD::VSELECT);
1670 setTargetDAGCombine(ISD::SELECT);
1671 setTargetDAGCombine(ISD::SHL);
1672 setTargetDAGCombine(ISD::SRA);
1673 setTargetDAGCombine(ISD::SRL);
1674 setTargetDAGCombine(ISD::OR);
1675 setTargetDAGCombine(ISD::AND);
1676 setTargetDAGCombine(ISD::ADD);
1677 setTargetDAGCombine(ISD::FADD);
1678 setTargetDAGCombine(ISD::FSUB);
1679 setTargetDAGCombine(ISD::FNEG);
1680 setTargetDAGCombine(ISD::FMA);
1681 setTargetDAGCombine(ISD::FMINNUM);
1682 setTargetDAGCombine(ISD::FMAXNUM);
1683 setTargetDAGCombine(ISD::SUB);
1684 setTargetDAGCombine(ISD::LOAD);
1685 setTargetDAGCombine(ISD::MLOAD);
1686 setTargetDAGCombine(ISD::STORE);
1687 setTargetDAGCombine(ISD::MSTORE);
1688 setTargetDAGCombine(ISD::TRUNCATE);
1689 setTargetDAGCombine(ISD::ZERO_EXTEND);
1690 setTargetDAGCombine(ISD::ANY_EXTEND);
1691 setTargetDAGCombine(ISD::SIGN_EXTEND);
1692 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1693 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
1694 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1695 setTargetDAGCombine(ISD::SINT_TO_FP);
1696 setTargetDAGCombine(ISD::UINT_TO_FP);
1697 setTargetDAGCombine(ISD::SETCC);
1698 setTargetDAGCombine(ISD::MUL);
1699 setTargetDAGCombine(ISD::XOR);
1700 setTargetDAGCombine(ISD::MSCATTER);
1701 setTargetDAGCombine(ISD::MGATHER);
1702
1703 computeRegisterProperties(Subtarget.getRegisterInfo());
1704
1705 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1706 MaxStoresPerMemsetOptSize = 8;
1707 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1708 MaxStoresPerMemcpyOptSize = 4;
1709 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1710 MaxStoresPerMemmoveOptSize = 4;
1711
1712 // TODO: These control memcmp expansion in CGP and could be raised higher, but
1713 // that needs to benchmarked and balanced with the potential use of vector
1714 // load/store types (PR33329, PR33914).
1715 MaxLoadsPerMemcmp = 2;
1716 MaxLoadsPerMemcmpOptSize = 2;
1717
1718 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1719 setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1720
1721 // An out-of-order CPU can speculatively execute past a predictable branch,
1722 // but a conditional move could be stalled by an expensive earlier operation.
1723 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1724 EnableExtLdPromotion = true;
1725 setPrefFunctionAlignment(4); // 2^4 bytes.
1726
1727 verifyIntrinsicTables();
1728}
1729
1730// This has so far only been implemented for 64-bit MachO.
1731bool X86TargetLowering::useLoadStackGuardNode() const {
1732 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1733}
1734
1735bool X86TargetLowering::useStackGuardXorFP() const {
1736 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
1737 return Subtarget.getTargetTriple().isOSMSVCRT();
1738}
1739
1740SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1741 const SDLoc &DL) const {
1742 EVT PtrTy = getPointerTy(DAG.getDataLayout());
1743 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
1744 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
1745 return SDValue(Node, 0);
1746}
1747
1748TargetLoweringBase::LegalizeTypeAction
1749X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1750 if (ExperimentalVectorWideningLegalization &&
1751 VT.getVectorNumElements() != 1 &&
1752 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1753 return TypeWidenVector;
1754
1755 return TargetLoweringBase::getPreferredVectorAction(VT);
1756}
1757
1758EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1759 LLVMContext& Context,
1760 EVT VT) const {
1761 if (!VT.isVector())
1762 return MVT::i8;
1763
1764 if (Subtarget.hasAVX512()) {
1765 const unsigned NumElts = VT.getVectorNumElements();
1766
1767 // Figure out what this type will be legalized to.
1768 EVT LegalVT = VT;
1769 while (getTypeAction(Context, LegalVT) != TypeLegal)
1770 LegalVT = getTypeToTransformTo(Context, LegalVT);
1771
1772 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
1773 if (LegalVT.getSimpleVT().is512BitVector())
1774 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1775
1776 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
1777 // If we legalized to less than a 512-bit vector, then we will use a vXi1
1778 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
1779 // vXi16/vXi8.
1780 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
1781 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
1782 return EVT::getVectorVT(Context, MVT::i1, NumElts);
1783 }
1784 }
1785
1786 return VT.changeVectorElementTypeToInteger();
1787}
1788
1789/// Helper for getByValTypeAlignment to determine
1790/// the desired ByVal argument alignment.
1791static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1792 if (MaxAlign == 16)
1793 return;
1794 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1795 if (VTy->getBitWidth() == 128)
1796 MaxAlign = 16;
1797 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1798 unsigned EltAlign = 0;
1799 getMaxByValAlign(ATy->getElementType(), EltAlign);
1800 if (EltAlign > MaxAlign)
1801 MaxAlign = EltAlign;
1802 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1803 for (auto *EltTy : STy->elements()) {
1804 unsigned EltAlign = 0;
1805 getMaxByValAlign(EltTy, EltAlign);
1806 if (EltAlign > MaxAlign)
1807 MaxAlign = EltAlign;
1808 if (MaxAlign == 16)
1809 break;
1810 }
1811 }
1812}
1813
1814/// Return the desired alignment for ByVal aggregate
1815/// function arguments in the caller parameter area. For X86, aggregates
1816/// that contain SSE vectors are placed at 16-byte boundaries while the rest
1817/// are at 4-byte boundaries.
1818unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1819 const DataLayout &DL) const {
1820 if (Subtarget.is64Bit()) {
1821 // Max of 8 and alignment of type.
1822 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1823 if (TyAlign > 8)
1824 return TyAlign;
1825 return 8;
1826 }
1827
1828 unsigned Align = 4;
1829 if (Subtarget.hasSSE1())
1830 getMaxByValAlign(Ty, Align);
1831 return Align;
1832}
1833
1834/// Returns the target specific optimal type for load
1835/// and store operations as a result of memset, memcpy, and memmove
1836/// lowering. If DstAlign is zero that means it's safe to destination
1837/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1838/// means there isn't a need to check it against alignment requirement,
1839/// probably because the source does not need to be loaded. If 'IsMemset' is
1840/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1841/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1842/// source is constant so it does not need to be loaded.
1843/// It returns EVT::Other if the type should be determined using generic
1844/// target-independent logic.
1845EVT
1846X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1847 unsigned DstAlign, unsigned SrcAlign,
1848 bool IsMemset, bool ZeroMemset,
1849 bool MemcpyStrSrc,
1850 MachineFunction &MF) const {
1851 const Function &F = MF.getFunction();
1852 if (!F.hasFnAttribute(Attribute::NoImplicitFloat)) {
1853 if (Size >= 16 &&
1854 (!Subtarget.isUnalignedMem16Slow() ||
1855 ((DstAlign == 0 || DstAlign >= 16) &&
1856 (SrcAlign == 0 || SrcAlign >= 16)))) {
1857 // FIXME: Check if unaligned 32-byte accesses are slow.
1858 if (Size >= 32 && Subtarget.hasAVX()) {
1859 // Although this isn't a well-supported type for AVX1, we'll let
1860 // legalization and shuffle lowering produce the optimal codegen. If we
1861 // choose an optimal type with a vector element larger than a byte,
1862 // getMemsetStores() may create an intermediate splat (using an integer
1863 // multiply) before we splat as a vector.
1864 return MVT::v32i8;
1865 }
1866 if (Subtarget.hasSSE2())
1867 return MVT::v16i8;
1868 // TODO: Can SSE1 handle a byte vector?
1869 if (Subtarget.hasSSE1())
1870 return MVT::v4f32;
1871 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
1872 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
1873 // Do not use f64 to lower memcpy if source is string constant. It's
1874 // better to use i32 to avoid the loads.
1875 // Also, do not use f64 to lower memset unless this is a memset of zeros.
1876 // The gymnastics of splatting a byte value into an XMM register and then
1877 // only using 8-byte stores (because this is a CPU with slow unaligned
1878 // 16-byte accesses) makes that a loser.
1879 return MVT::f64;
1880 }
1881 }
1882 // This is a compromise. If we reach here, unaligned accesses may be slow on
1883 // this target. However, creating smaller, aligned accesses could be even
1884 // slower and would certainly be a lot more code.
1885 if (Subtarget.is64Bit() && Size >= 8)
1886 return MVT::i64;
1887 return MVT::i32;
1888}
1889
1890bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
1891 if (VT == MVT::f32)
1892 return X86ScalarSSEf32;
1893 else if (VT == MVT::f64)
1894 return X86ScalarSSEf64;
1895 return true;
1896}
1897
1898bool
1899X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1900 unsigned,
1901 unsigned,
1902 bool *Fast) const {
1903 if (Fast) {
1904 switch (VT.getSizeInBits()) {
1905 default:
1906 // 8-byte and under are always assumed to be fast.
1907 *Fast = true;
1908 break;
1909 case 128:
1910 *Fast = !Subtarget.isUnalignedMem16Slow();
1911 break;
1912 case 256:
1913 *Fast = !Subtarget.isUnalignedMem32Slow();
1914 break;
1915 // TODO: What about AVX-512 (512-bit) accesses?
1916 }
1917 }
1918 // Misaligned accesses of any size are always allowed.
1919 return true;
1920}
1921
1922/// Return the entry encoding for a jump table in the
1923/// current function. The returned value is a member of the
1924/// MachineJumpTableInfo::JTEntryKind enum.
1925unsigned X86TargetLowering::getJumpTableEncoding() const {
1926 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
1927 // symbol.
1928 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
1929 return MachineJumpTableInfo::EK_Custom32;
1930
1931 // Otherwise, use the normal jump table encoding heuristics.
1932 return TargetLowering::getJumpTableEncoding();
1933}
1934
1935bool X86TargetLowering::useSoftFloat() const {
1936 return Subtarget.useSoftFloat();
1937}
1938
1939void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
1940 ArgListTy &Args) const {
1941
1942 // Only relabel X86-32 for C / Stdcall CCs.
1943 if (Subtarget.is64Bit())
1944 return;
1945 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
1946 return;
1947 unsigned ParamRegs = 0;
1948 if (auto *M = MF->getFunction().getParent())
1949 ParamRegs = M->getNumberRegisterParameters();
1950
1951 // Mark the first N int arguments as having reg
1952 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
1953 Type *T = Args[Idx].Ty;
1954 if (T->isPointerTy() || T->isIntegerTy())
1955 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
1956 unsigned numRegs = 1;
1957 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
1958 numRegs = 2;
1959 if (ParamRegs < numRegs)
1960 return;
1961 ParamRegs -= numRegs;
1962 Args[Idx].IsInReg = true;
1963 }
1964 }
1965}
1966
1967const MCExpr *
1968X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1969 const MachineBasicBlock *MBB,
1970 unsigned uid,MCContext &Ctx) const{
1971 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())(static_cast <bool> (isPositionIndependent() &&
Subtarget.isPICStyleGOT()) ? void (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 1971, __extension__ __PRETTY_FUNCTION__))
;
1972 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
1973 // entries.
1974 return MCSymbolRefExpr::create(MBB->getSymbol(),
1975 MCSymbolRefExpr::VK_GOTOFF, Ctx);
1976}
1977
1978/// Returns relocation base for the given PIC jumptable.
1979SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
1980 SelectionDAG &DAG) const {
1981 if (!Subtarget.is64Bit())
1982 // This doesn't have SDLoc associated with it, but is not really the
1983 // same as a Register.
1984 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
1985 getPointerTy(DAG.getDataLayout()));
1986 return Table;
1987}
1988
1989/// This returns the relocation base for the given PIC jumptable,
1990/// the same as getPICJumpTableRelocBase, but as an MCExpr.
1991const MCExpr *X86TargetLowering::
1992getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
1993 MCContext &Ctx) const {
1994 // X86-64 uses RIP relative addressing based on the jump table label.
1995 if (Subtarget.isPICStyleRIPRel())
1996 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
1997
1998 // Otherwise, the reference is relative to the PIC base.
1999 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2000}
2001
2002std::pair<const TargetRegisterClass *, uint8_t>
2003X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2004 MVT VT) const {
2005 const TargetRegisterClass *RRC = nullptr;
2006 uint8_t Cost = 1;
2007 switch (VT.SimpleTy) {
2008 default:
2009 return TargetLowering::findRepresentativeClass(TRI, VT);
2010 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2011 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2012 break;
2013 case MVT::x86mmx:
2014 RRC = &X86::VR64RegClass;
2015 break;
2016 case MVT::f32: case MVT::f64:
2017 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2018 case MVT::v4f32: case MVT::v2f64:
2019 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2020 case MVT::v8f32: case MVT::v4f64:
2021 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2022 case MVT::v16f32: case MVT::v8f64:
2023 RRC = &X86::VR128XRegClass;
2024 break;
2025 }
2026 return std::make_pair(RRC, Cost);
2027}
2028
2029unsigned X86TargetLowering::getAddressSpace() const {
2030 if (Subtarget.is64Bit())
2031 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2032 return 256;
2033}
2034
2035static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2036 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2037 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2038}
2039
2040static Constant* SegmentOffset(IRBuilder<> &IRB,
2041 unsigned Offset, unsigned AddressSpace) {
2042 return ConstantExpr::getIntToPtr(
2043 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2044 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2045}
2046
2047Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2048 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2049 // tcbhead_t; use it instead of the usual global variable (see
2050 // sysdeps/{i386,x86_64}/nptl/tls.h)
2051 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2052 if (Subtarget.isTargetFuchsia()) {
2053 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2054 return SegmentOffset(IRB, 0x10, getAddressSpace());
2055 } else {
2056 // %fs:0x28, unless we're using a Kernel code model, in which case
2057 // it's %gs:0x28. gs:0x14 on i386.
2058 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2059 return SegmentOffset(IRB, Offset, getAddressSpace());
2060 }
2061 }
2062
2063 return TargetLowering::getIRStackGuard(IRB);
2064}
2065
2066void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2067 // MSVC CRT provides functionalities for stack protection.
2068 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
2069 // MSVC CRT has a global variable holding security cookie.
2070 M.getOrInsertGlobal("__security_cookie",
2071 Type::getInt8PtrTy(M.getContext()));
2072
2073 // MSVC CRT has a function to validate security cookie.
2074 auto *SecurityCheckCookie = cast<Function>(
2075 M.getOrInsertFunction("__security_check_cookie",
2076 Type::getVoidTy(M.getContext()),
2077 Type::getInt8PtrTy(M.getContext())));
2078 SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
2079 SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
2080 return;
2081 }
2082 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2083 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2084 return;
2085 TargetLowering::insertSSPDeclarations(M);
2086}
2087
2088Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2089 // MSVC CRT has a global variable holding security cookie.
2090 if (Subtarget.getTargetTriple().isOSMSVCRT())
2091 return M.getGlobalVariable("__security_cookie");
2092 return TargetLowering::getSDagStackGuard(M);
2093}
2094
2095Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2096 // MSVC CRT has a function to validate security cookie.
2097 if (Subtarget.getTargetTriple().isOSMSVCRT())
2098 return M.getFunction("__security_check_cookie");
2099 return TargetLowering::getSSPStackGuardCheck(M);
2100}
2101
2102Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2103 if (Subtarget.getTargetTriple().isOSContiki())
2104 return getDefaultSafeStackPointerLocation(IRB, false);
2105
2106 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2107 // definition of TLS_SLOT_SAFESTACK in
2108 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2109 if (Subtarget.isTargetAndroid()) {
2110 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2111 // %gs:0x24 on i386
2112 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2113 return SegmentOffset(IRB, Offset, getAddressSpace());
2114 }
2115
2116 // Fuchsia is similar.
2117 if (Subtarget.isTargetFuchsia()) {
2118 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2119 return SegmentOffset(IRB, 0x18, getAddressSpace());
2120 }
2121
2122 return TargetLowering::getSafeStackPointerLocation(IRB);
2123}
2124
2125bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2126 unsigned DestAS) const {
2127 assert(SrcAS != DestAS && "Expected different address spaces!")(static_cast <bool> (SrcAS != DestAS && "Expected different address spaces!"
) ? void (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2127, __extension__ __PRETTY_FUNCTION__))
;
2128
2129 return SrcAS < 256 && DestAS < 256;
2130}
2131
2132//===----------------------------------------------------------------------===//
2133// Return Value Calling Convention Implementation
2134//===----------------------------------------------------------------------===//
2135
2136#include "X86GenCallingConv.inc"
2137
2138bool X86TargetLowering::CanLowerReturn(
2139 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2140 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2141 SmallVector<CCValAssign, 16> RVLocs;
2142 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2143 return CCInfo.CheckReturn(Outs, RetCC_X86);
2144}
2145
2146const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2147 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2148 return ScratchRegs;
2149}
2150
2151/// Lowers masks values (v*i1) to the local register values
2152/// \returns DAG node after lowering to register type
2153static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2154 const SDLoc &Dl, SelectionDAG &DAG) {
2155 EVT ValVT = ValArg.getValueType();
2156
2157 if (ValVT == MVT::v1i1)
2158 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2159 DAG.getIntPtrConstant(0, Dl));
2160
2161 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2162 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2163 // Two stage lowering might be required
2164 // bitcast: v8i1 -> i8 / v16i1 -> i16
2165 // anyextend: i8 -> i32 / i16 -> i32
2166 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2167 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2168 if (ValLoc == MVT::i32)
2169 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2170 return ValToCopy;
2171 } else if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2172 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2173 // One stage lowering is required
2174 // bitcast: v32i1 -> i32 / v64i1 -> i64
2175 return DAG.getBitcast(ValLoc, ValArg);
2176 } else
2177 return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg);
2178}
2179
2180/// Breaks v64i1 value into two registers and adds the new node to the DAG
2181static void Passv64i1ArgInRegs(
2182 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2183 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2184 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2185 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")(static_cast <bool> (Subtarget.hasBWI() && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("Subtarget.hasBWI() && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2185, __extension__ __PRETTY_FUNCTION__))
;
2186 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2186, __extension__ __PRETTY_FUNCTION__))
;
2187 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")(static_cast <bool> (Arg.getValueType() == MVT::i64 &&
"Expecting 64 bit value") ? void (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2187, __extension__ __PRETTY_FUNCTION__))
;
2188 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2189, __extension__ __PRETTY_FUNCTION__))
2189 "The value should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The value should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2189, __extension__ __PRETTY_FUNCTION__))
;
2190
2191 // Before splitting the value we cast it to i64
2192 Arg = DAG.getBitcast(MVT::i64, Arg);
2193
2194 // Splitting the value into two i32 types
2195 SDValue Lo, Hi;
2196 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2197 DAG.getConstant(0, Dl, MVT::i32));
2198 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2199 DAG.getConstant(1, Dl, MVT::i32));
2200
2201 // Attach the two i32 types into corresponding registers
2202 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2203 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2204}
2205
2206SDValue
2207X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2208 bool isVarArg,
2209 const SmallVectorImpl<ISD::OutputArg> &Outs,
2210 const SmallVectorImpl<SDValue> &OutVals,
2211 const SDLoc &dl, SelectionDAG &DAG) const {
2212 MachineFunction &MF = DAG.getMachineFunction();
2213 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2214
2215 // In some cases we need to disable registers from the default CSR list.
2216 // For example, when they are used for argument passing.
2217 bool ShouldDisableCalleeSavedRegister =
2218 CallConv == CallingConv::X86_RegCall ||
2219 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2220
2221 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2222 report_fatal_error("X86 interrupts may not return any value");
2223
2224 SmallVector<CCValAssign, 16> RVLocs;
2225 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2226 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2227
2228 SDValue Flag;
2229 SmallVector<SDValue, 6> RetOps;
2230 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2231 // Operand #1 = Bytes To Pop
2232 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2233 MVT::i32));
2234
2235 // Copy the result values into the output registers.
2236 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2237 ++I, ++OutsIndex) {
2238 CCValAssign &VA = RVLocs[I];
2239 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2239, __extension__ __PRETTY_FUNCTION__))
;
2240
2241 // Add the register to the CalleeSaveDisableRegs list.
2242 if (ShouldDisableCalleeSavedRegister)
2243 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2244
2245 SDValue ValToCopy = OutVals[OutsIndex];
2246 EVT ValVT = ValToCopy.getValueType();
2247
2248 // Promote values to the appropriate types.
2249 if (VA.getLocInfo() == CCValAssign::SExt)
2250 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2251 else if (VA.getLocInfo() == CCValAssign::ZExt)
2252 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2253 else if (VA.getLocInfo() == CCValAssign::AExt) {
2254 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2255 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2256 else
2257 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2258 }
2259 else if (VA.getLocInfo() == CCValAssign::BCvt)
2260 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2261
2262 assert(VA.getLocInfo() != CCValAssign::FPExt &&(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2263, __extension__ __PRETTY_FUNCTION__))
2263 "Unexpected FP-extend for return value.")(static_cast <bool> (VA.getLocInfo() != CCValAssign::FPExt
&& "Unexpected FP-extend for return value.") ? void (
0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2263, __extension__ __PRETTY_FUNCTION__))
;
2264
2265 // If this is x86-64, and we disabled SSE, we can't return FP values,
2266 // or SSE or MMX vectors.
2267 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2268 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2269 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2270 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2271 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2272 } else if (ValVT == MVT::f64 &&
2273 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2274 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2275 // llvm-gcc has never done it right and no one has noticed, so this
2276 // should be OK for now.
2277 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2278 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2279 }
2280
2281 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2282 // the RET instruction and handled by the FP Stackifier.
2283 if (VA.getLocReg() == X86::FP0 ||
2284 VA.getLocReg() == X86::FP1) {
2285 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2286 // change the value to the FP stack register class.
2287 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2288 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2289 RetOps.push_back(ValToCopy);
2290 // Don't emit a copytoreg.
2291 continue;
2292 }
2293
2294 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2295 // which is returned in RAX / RDX.
2296 if (Subtarget.is64Bit()) {
2297 if (ValVT == MVT::x86mmx) {
2298 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2299 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2300 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2301 ValToCopy);
2302 // If we don't have SSE2 available, convert to v4f32 so the generated
2303 // register is legal.
2304 if (!Subtarget.hasSSE2())
2305 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2306 }
2307 }
2308 }
2309
2310 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2311
2312 if (VA.needsCustom()) {
2313 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2314, __extension__ __PRETTY_FUNCTION__))
2314 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2314, __extension__ __PRETTY_FUNCTION__))
;
2315
2316 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2317 Subtarget);
2318
2319 assert(2 == RegsToPass.size() &&(static_cast <bool> (2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? void (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2320, __extension__ __PRETTY_FUNCTION__))
2320 "Expecting two registers after Pass64BitArgInRegs")(static_cast <bool> (2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? void (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2320, __extension__ __PRETTY_FUNCTION__))
;
2321
2322 // Add the second register to the CalleeSaveDisableRegs list.
2323 if (ShouldDisableCalleeSavedRegister)
2324 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2325 } else {
2326 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2327 }
2328
2329 // Add nodes to the DAG and add the values into the RetOps list
2330 for (auto &Reg : RegsToPass) {
2331 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2332 Flag = Chain.getValue(1);
2333 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2334 }
2335 }
2336
2337 // Swift calling convention does not require we copy the sret argument
2338 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2339
2340 // All x86 ABIs require that for returning structs by value we copy
2341 // the sret argument into %rax/%eax (depending on ABI) for the return.
2342 // We saved the argument into a virtual register in the entry block,
2343 // so now we copy the value out and into %rax/%eax.
2344 //
2345 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2346 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2347 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2348 // either case FuncInfo->setSRetReturnReg() will have been called.
2349 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2350 // When we have both sret and another return value, we should use the
2351 // original Chain stored in RetOps[0], instead of the current Chain updated
2352 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2353
2354 // For the case of sret and another return value, we have
2355 // Chain_0 at the function entry
2356 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2357 // If we use Chain_1 in getCopyFromReg, we will have
2358 // Val = getCopyFromReg(Chain_1)
2359 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2360
2361 // getCopyToReg(Chain_0) will be glued together with
2362 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2363 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2364 // Data dependency from Unit B to Unit A due to usage of Val in
2365 // getCopyToReg(Chain_1, Val)
2366 // Chain dependency from Unit A to Unit B
2367
2368 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2369 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2370 getPointerTy(MF.getDataLayout()));
2371
2372 unsigned RetValReg
2373 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2374 X86::RAX : X86::EAX;
2375 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2376 Flag = Chain.getValue(1);
2377
2378 // RAX/EAX now acts like a return value.
2379 RetOps.push_back(
2380 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2381
2382 // Add the returned register to the CalleeSaveDisableRegs list.
2383 if (ShouldDisableCalleeSavedRegister)
2384 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2385 }
2386
2387 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2388 const MCPhysReg *I =
2389 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2390 if (I) {
2391 for (; *I; ++I) {
2392 if (X86::GR64RegClass.contains(*I))
2393 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2394 else
2395 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2395)
;
2396 }
2397 }
2398
2399 RetOps[0] = Chain; // Update chain.
2400
2401 // Add the flag if we have it.
2402 if (Flag.getNode())
2403 RetOps.push_back(Flag);
2404
2405 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2406 if (CallConv == CallingConv::X86_INTR)
2407 opcode = X86ISD::IRET;
2408 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2409}
2410
2411bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2412 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2413 return false;
2414
2415 SDValue TCChain = Chain;
2416 SDNode *Copy = *N->use_begin();
2417 if (Copy->getOpcode() == ISD::CopyToReg) {
2418 // If the copy has a glue operand, we conservatively assume it isn't safe to
2419 // perform a tail call.
2420 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2421 return false;
2422 TCChain = Copy->getOperand(0);
2423 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2424 return false;
2425
2426 bool HasRet = false;
2427 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2428 UI != UE; ++UI) {
2429 if (UI->getOpcode() != X86ISD::RET_FLAG)
2430 return false;
2431 // If we are returning more than one value, we can definitely
2432 // not make a tail call see PR19530
2433 if (UI->getNumOperands() > 4)
2434 return false;
2435 if (UI->getNumOperands() == 4 &&
2436 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2437 return false;
2438 HasRet = true;
2439 }
2440
2441 if (!HasRet)
2442 return false;
2443
2444 Chain = TCChain;
2445 return true;
2446}
2447
2448EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2449 ISD::NodeType ExtendKind) const {
2450 MVT ReturnMVT = MVT::i32;
2451
2452 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2453 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2454 // The ABI does not require i1, i8 or i16 to be extended.
2455 //
2456 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2457 // always extending i8/i16 return values, so keep doing that for now.
2458 // (PR26665).
2459 ReturnMVT = MVT::i8;
2460 }
2461
2462 EVT MinVT = getRegisterType(Context, ReturnMVT);
2463 return VT.bitsLT(MinVT) ? MinVT : VT;
2464}
2465
2466/// Reads two 32 bit registers and creates a 64 bit mask value.
2467/// \param VA The current 32 bit value that need to be assigned.
2468/// \param NextVA The next 32 bit value that need to be assigned.
2469/// \param Root The parent DAG node.
2470/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2471/// glue purposes. In the case the DAG is already using
2472/// physical register instead of virtual, we should glue
2473/// our new SDValue to InFlag SDvalue.
2474/// \return a new SDvalue of size 64bit.
2475static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2476 SDValue &Root, SelectionDAG &DAG,
2477 const SDLoc &Dl, const X86Subtarget &Subtarget,
2478 SDValue *InFlag = nullptr) {
2479 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(static_cast <bool> ((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? void (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2479, __extension__ __PRETTY_FUNCTION__))
;
2480 assert(Subtarget.is32Bit() && "Expecting 32 bit target")(static_cast <bool> (Subtarget.is32Bit() && "Expecting 32 bit target"
) ? void (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2480, __extension__ __PRETTY_FUNCTION__))
;
2481 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2482, __extension__ __PRETTY_FUNCTION__))
2482 "Expecting first location of 64 bit width type")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Expecting first location of 64 bit width type") ? void (0) :
__assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2482, __extension__ __PRETTY_FUNCTION__))
;
2483 assert(NextVA.getValVT() == VA.getValVT() &&(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2484, __extension__ __PRETTY_FUNCTION__))
2484 "The locations should have the same type")(static_cast <bool> (NextVA.getValVT() == VA.getValVT()
&& "The locations should have the same type") ? void
(0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2484, __extension__ __PRETTY_FUNCTION__))
;
2485 assert(VA.isRegLoc() && NextVA.isRegLoc() &&(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2486, __extension__ __PRETTY_FUNCTION__))
2486 "The values should reside in two registers")(static_cast <bool> (VA.isRegLoc() && NextVA.isRegLoc
() && "The values should reside in two registers") ? void
(0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2486, __extension__ __PRETTY_FUNCTION__))
;
2487
2488 SDValue Lo, Hi;
2489 unsigned Reg;
2490 SDValue ArgValueLo, ArgValueHi;
2491
2492 MachineFunction &MF = DAG.getMachineFunction();
2493 const TargetRegisterClass *RC = &X86::GR32RegClass;
2494
2495 // Read a 32 bit value from the registers
2496 if (nullptr == InFlag) {
2497 // When no physical register is present,
2498 // create an intermediate virtual register
2499 Reg = MF.addLiveIn(VA.getLocReg(), RC);
2500 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2501 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2502 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2503 } else {
2504 // When a physical register is available read the value from it and glue
2505 // the reads together.
2506 ArgValueLo =
2507 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2508 *InFlag = ArgValueLo.getValue(2);
2509 ArgValueHi =
2510 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2511 *InFlag = ArgValueHi.getValue(2);
2512 }
2513
2514 // Convert the i32 type into v32i1 type
2515 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2516
2517 // Convert the i32 type into v32i1 type
2518 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2519
2520 // Concatenate the two values together
2521 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2522}
2523
2524/// The function will lower a register of various sizes (8/16/32/64)
2525/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2526/// \returns a DAG node contains the operand after lowering to mask type.
2527static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2528 const EVT &ValLoc, const SDLoc &Dl,
2529 SelectionDAG &DAG) {
2530 SDValue ValReturned = ValArg;
2531
2532 if (ValVT == MVT::v1i1)
2533 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
2534
2535 if (ValVT == MVT::v64i1) {
2536 // In 32 bit machine, this case is handled by getv64i1Argument
2537 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")(static_cast <bool> (ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? void (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2537, __extension__ __PRETTY_FUNCTION__))
;
2538 // In 64 bit machine, There is no need to truncate the value only bitcast
2539 } else {
2540 MVT maskLen;
2541 switch (ValVT.getSimpleVT().SimpleTy) {
2542 case MVT::v8i1:
2543 maskLen = MVT::i8;
2544 break;
2545 case MVT::v16i1:
2546 maskLen = MVT::i16;
2547 break;
2548 case MVT::v32i1:
2549 maskLen = MVT::i32;
2550 break;
2551 default:
2552 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2552)
;
2553 }
2554
2555 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2556 }
2557 return DAG.getBitcast(ValVT, ValReturned);
2558}
2559
2560/// Lower the result values of a call into the
2561/// appropriate copies out of appropriate physical registers.
2562///
2563SDValue X86TargetLowering::LowerCallResult(
2564 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2565 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2566 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2567 uint32_t *RegMask) const {
2568
2569 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2570 // Assign locations to each value returned by this call.
2571 SmallVector<CCValAssign, 16> RVLocs;
2572 bool Is64Bit = Subtarget.is64Bit();
2573 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2574 *DAG.getContext());
2575 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2576
2577 // Copy all of the result registers out of their specified physreg.
2578 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2579 ++I, ++InsIndex) {
2580 CCValAssign &VA = RVLocs[I];
2581 EVT CopyVT = VA.getLocVT();
2582
2583 // In some calling conventions we need to remove the used registers
2584 // from the register mask.
2585 if (RegMask) {
2586 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2587 SubRegs.isValid(); ++SubRegs)
2588 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2589 }
2590
2591 // If this is x86-64, and we disabled SSE, we can't return FP values
2592 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2593 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2594 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2595 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2596 }
2597
2598 // If we prefer to use the value in xmm registers, copy it out as f80 and
2599 // use a truncate to move it from fp stack reg to xmm reg.
2600 bool RoundAfterCopy = false;
2601 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2602 isScalarFPTypeInSSEReg(VA.getValVT())) {
2603 if (!Subtarget.hasX87())
2604 report_fatal_error("X87 register return with X87 disabled");
2605 CopyVT = MVT::f80;
2606 RoundAfterCopy = (CopyVT != VA.getLocVT());
2607 }
2608
2609 SDValue Val;
2610 if (VA.needsCustom()) {
2611 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2612, __extension__ __PRETTY_FUNCTION__))
2612 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2612, __extension__ __PRETTY_FUNCTION__))
;
2613 Val =
2614 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2615 } else {
2616 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2617 .getValue(1);
2618 Val = Chain.getValue(0);
2619 InFlag = Chain.getValue(2);
2620 }
2621
2622 if (RoundAfterCopy)
2623 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2624 // This truncation won't change the value.
2625 DAG.getIntPtrConstant(1, dl));
2626
2627 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2628 if (VA.getValVT().isVector() &&
2629 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2630 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2631 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2632 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2633 } else
2634 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2635 }
2636
2637 InVals.push_back(Val);
2638 }
2639
2640 return Chain;
2641}
2642
2643//===----------------------------------------------------------------------===//
2644// C & StdCall & Fast Calling Convention implementation
2645//===----------------------------------------------------------------------===//
2646// StdCall calling convention seems to be standard for many Windows' API
2647// routines and around. It differs from C calling convention just a little:
2648// callee should clean up the stack, not caller. Symbols should be also
2649// decorated in some fancy way :) It doesn't support any vector arguments.
2650// For info on fast calling convention see Fast Calling Convention (tail call)
2651// implementation LowerX86_32FastCCCallTo.
2652
2653/// CallIsStructReturn - Determines whether a call uses struct return
2654/// semantics.
2655enum StructReturnType {
2656 NotStructReturn,
2657 RegStructReturn,
2658 StackStructReturn
2659};
2660static StructReturnType
2661callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
2662 if (Outs.empty())
2663 return NotStructReturn;
2664
2665 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2666 if (!Flags.isSRet())
2667 return NotStructReturn;
2668 if (Flags.isInReg() || IsMCU)
2669 return RegStructReturn;
2670 return StackStructReturn;
2671}
2672
2673/// Determines whether a function uses struct return semantics.
2674static StructReturnType
2675argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
2676 if (Ins.empty())
2677 return NotStructReturn;
2678
2679 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2680 if (!Flags.isSRet())
2681 return NotStructReturn;
2682 if (Flags.isInReg() || IsMCU)
2683 return RegStructReturn;
2684 return StackStructReturn;
2685}
2686
2687/// Make a copy of an aggregate at address specified by "Src" to address
2688/// "Dst" with size and alignment information specified by the specific
2689/// parameter attribute. The copy will be passed as a byval function parameter.
2690static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2691 SDValue Chain, ISD::ArgFlagsTy Flags,
2692 SelectionDAG &DAG, const SDLoc &dl) {
2693 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2694
2695 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2696 /*isVolatile*/false, /*AlwaysInline=*/true,
2697 /*isTailCall*/false,
2698 MachinePointerInfo(), MachinePointerInfo());
2699}
2700
2701/// Return true if the calling convention is one that we can guarantee TCO for.
2702static bool canGuaranteeTCO(CallingConv::ID CC) {
2703 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2704 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2705 CC == CallingConv::HHVM);
2706}
2707
2708/// Return true if we might ever do TCO for calls with this calling convention.
2709static bool mayTailCallThisCC(CallingConv::ID CC) {
2710 switch (CC) {
2711 // C calling conventions:
2712 case CallingConv::C:
2713 case CallingConv::Win64:
2714 case CallingConv::X86_64_SysV:
2715 // Callee pop conventions:
2716 case CallingConv::X86_ThisCall:
2717 case CallingConv::X86_StdCall:
2718 case CallingConv::X86_VectorCall:
2719 case CallingConv::X86_FastCall:
2720 return true;
2721 default:
2722 return canGuaranteeTCO(CC);
2723 }
2724}
2725
2726/// Return true if the function is being made into a tailcall target by
2727/// changing its ABI.
2728static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2729 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2730}
2731
2732bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2733 auto Attr =
2734 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2735 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2736 return false;
2737
2738 ImmutableCallSite CS(CI);
2739 CallingConv::ID CalleeCC = CS.getCallingConv();
2740 if (!mayTailCallThisCC(CalleeCC))
2741 return false;
2742
2743 return true;
2744}
2745
2746SDValue
2747X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2748 const SmallVectorImpl<ISD::InputArg> &Ins,
2749 const SDLoc &dl, SelectionDAG &DAG,
2750 const CCValAssign &VA,
2751 MachineFrameInfo &MFI, unsigned i) const {
2752 // Create the nodes corresponding to a load from this parameter slot.
2753 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2754 bool AlwaysUseMutable = shouldGuaranteeTCO(
2755 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2756 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2757 EVT ValVT;
2758 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2759
2760 // If value is passed by pointer we have address passed instead of the value
2761 // itself. No need to extend if the mask value and location share the same
2762 // absolute size.
2763 bool ExtendedInMem =
2764 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2765 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
2766
2767 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2768 ValVT = VA.getLocVT();
2769 else
2770 ValVT = VA.getValVT();
2771
2772 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2773 // taken by a return address.
2774 int Offset = 0;
2775 if (CallConv == CallingConv::X86_INTR) {
2776 // X86 interrupts may take one or two arguments.
2777 // On the stack there will be no return address as in regular call.
2778 // Offset of last argument need to be set to -4/-8 bytes.
2779 // Where offset of the first argument out of two, should be set to 0 bytes.
2780 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2781 if (Subtarget.is64Bit() && Ins.size() == 2) {
2782 // The stack pointer needs to be realigned for 64 bit handlers with error
2783 // code, so the argument offset changes by 8 bytes.
2784 Offset += 8;
2785 }
2786 }
2787
2788 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2789 // changed with more analysis.
2790 // In case of tail call optimization mark all arguments mutable. Since they
2791 // could be overwritten by lowering of arguments in case of a tail call.
2792 if (Flags.isByVal()) {
2793 unsigned Bytes = Flags.getByValSize();
2794 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2795 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2796 // Adjust SP offset of interrupt parameter.
2797 if (CallConv == CallingConv::X86_INTR) {
2798 MFI.setObjectOffset(FI, Offset);
2799 }
2800 return DAG.getFrameIndex(FI, PtrVT);
2801 }
2802
2803 // This is an argument in memory. We might be able to perform copy elision.
2804 if (Flags.isCopyElisionCandidate()) {
2805 EVT ArgVT = Ins[i].ArgVT;
2806 SDValue PartAddr;
2807 if (Ins[i].PartOffset == 0) {
2808 // If this is a one-part value or the first part of a multi-part value,
2809 // create a stack object for the entire argument value type and return a
2810 // load from our portion of it. This assumes that if the first part of an
2811 // argument is in memory, the rest will also be in memory.
2812 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
2813 /*Immutable=*/false);
2814 PartAddr = DAG.getFrameIndex(FI, PtrVT);
2815 return DAG.getLoad(
2816 ValVT, dl, Chain, PartAddr,
2817 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2818 } else {
2819 // This is not the first piece of an argument in memory. See if there is
2820 // already a fixed stack object including this offset. If so, assume it
2821 // was created by the PartOffset == 0 branch above and create a load from
2822 // the appropriate offset into it.
2823 int64_t PartBegin = VA.getLocMemOffset();
2824 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
2825 int FI = MFI.getObjectIndexBegin();
2826 for (; MFI.isFixedObjectIndex(FI); ++FI) {
2827 int64_t ObjBegin = MFI.getObjectOffset(FI);
2828 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
2829 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
2830 break;
2831 }
2832 if (MFI.isFixedObjectIndex(FI)) {
2833 SDValue Addr =
2834 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
2835 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
2836 return DAG.getLoad(
2837 ValVT, dl, Chain, Addr,
2838 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
2839 Ins[i].PartOffset));
2840 }
2841 }
2842 }
2843
2844 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
2845 VA.getLocMemOffset(), isImmutable);
2846
2847 // Set SExt or ZExt flag.
2848 if (VA.getLocInfo() == CCValAssign::ZExt) {
2849 MFI.setObjectZExt(FI, true);
2850 } else if (VA.getLocInfo() == CCValAssign::SExt) {
2851 MFI.setObjectSExt(FI, true);
2852 }
2853
2854 // Adjust SP offset of interrupt parameter.
2855 if (CallConv == CallingConv::X86_INTR) {
2856 MFI.setObjectOffset(FI, Offset);
2857 }
2858
2859 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2860 SDValue Val = DAG.getLoad(
2861 ValVT, dl, Chain, FIN,
2862 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2863 return ExtendedInMem
2864 ? (VA.getValVT().isVector()
2865 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
2866 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
2867 : Val;
2868}
2869
2870// FIXME: Get this from tablegen.
2871static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2872 const X86Subtarget &Subtarget) {
2873 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2873, __extension__ __PRETTY_FUNCTION__))
;
2874
2875 if (Subtarget.isCallingConvWin64(CallConv)) {
2876 static const MCPhysReg GPR64ArgRegsWin64[] = {
2877 X86::RCX, X86::RDX, X86::R8, X86::R9
2878 };
2879 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2880 }
2881
2882 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2883 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2884 };
2885 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2886}
2887
2888// FIXME: Get this from tablegen.
2889static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2890 CallingConv::ID CallConv,
2891 const X86Subtarget &Subtarget) {
2892 assert(Subtarget.is64Bit())(static_cast <bool> (Subtarget.is64Bit()) ? void (0) : __assert_fail
("Subtarget.is64Bit()", "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2892, __extension__ __PRETTY_FUNCTION__))
;
2893 if (Subtarget.isCallingConvWin64(CallConv)) {
2894 // The XMM registers which might contain var arg parameters are shadowed
2895 // in their paired GPR. So we only need to save the GPR to their home
2896 // slots.
2897 // TODO: __vectorcall will change this.
2898 return None;
2899 }
2900
2901 const Function &F = MF.getFunction();
2902 bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat);
2903 bool isSoftFloat = Subtarget.useSoftFloat();
2904 assert(!(isSoftFloat && NoImplicitFloatOps) &&(static_cast <bool> (!(isSoftFloat && NoImplicitFloatOps
) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2905, __extension__ __PRETTY_FUNCTION__))
2905 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(isSoftFloat && NoImplicitFloatOps
) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2905, __extension__ __PRETTY_FUNCTION__))
;
2906 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
2907 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2908 // registers.
2909 return None;
2910
2911 static const MCPhysReg XMMArgRegs64Bit[] = {
2912 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2913 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2914 };
2915 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2916}
2917
2918#ifndef NDEBUG
2919static bool isSortedByValueNo(const SmallVectorImpl<CCValAssign> &ArgLocs) {
2920 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
2921 [](const CCValAssign &A, const CCValAssign &B) -> bool {
2922 return A.getValNo() < B.getValNo();
2923 });
2924}
2925#endif
2926
2927SDValue X86TargetLowering::LowerFormalArguments(
2928 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2929 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2930 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2931 MachineFunction &MF = DAG.getMachineFunction();
2932 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2933 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
2934
2935 const Function &F = MF.getFunction();
2936 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
2937 F.getName() == "main")
2938 FuncInfo->setForceFramePointer(true);
2939
2940 MachineFrameInfo &MFI = MF.getFrameInfo();
2941 bool Is64Bit = Subtarget.is64Bit();
2942 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2943
2944 assert((static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2946, __extension__ __PRETTY_FUNCTION__))
2945 !(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2946, __extension__ __PRETTY_FUNCTION__))
2946 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2946, __extension__ __PRETTY_FUNCTION__))
;
2947
2948 if (CallConv == CallingConv::X86_INTR) {
2949 bool isLegal = Ins.size() == 1 ||
2950 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
2951 (!Is64Bit && Ins[1].VT == MVT::i32)));
2952 if (!isLegal)
2953 report_fatal_error("X86 interrupts may take one or two arguments");
2954 }
2955
2956 // Assign locations to all of the incoming arguments.
2957 SmallVector<CCValAssign, 16> ArgLocs;
2958 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2959
2960 // Allocate shadow area for Win64.
2961 if (IsWin64)
2962 CCInfo.AllocateStack(32, 8);
2963
2964 CCInfo.AnalyzeArguments(Ins, CC_X86);
2965
2966 // In vectorcall calling convention a second pass is required for the HVA
2967 // types.
2968 if (CallingConv::X86_VectorCall == CallConv) {
2969 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
2970 }
2971
2972 // The next loop assumes that the locations are in the same order of the
2973 // input arguments.
2974 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2975, __extension__ __PRETTY_FUNCTION__))
2975 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2975, __extension__ __PRETTY_FUNCTION__))
;
2976
2977 SDValue ArgValue;
2978 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
2979 ++I, ++InsIndex) {
2980 assert(InsIndex < Ins.size() && "Invalid Ins index")(static_cast <bool> (InsIndex < Ins.size() &&
"Invalid Ins index") ? void (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2980, __extension__ __PRETTY_FUNCTION__))
;
2981 CCValAssign &VA = ArgLocs[I];
2982
2983 if (VA.isRegLoc()) {
2984 EVT RegVT = VA.getLocVT();
2985 if (VA.needsCustom()) {
2986 assert((static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2988, __extension__ __PRETTY_FUNCTION__))
2987 VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2988, __extension__ __PRETTY_FUNCTION__))
2988 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 2988, __extension__ __PRETTY_FUNCTION__))
;
2989
2990 // v64i1 values, in regcall calling convention, that are
2991 // compiled to 32 bit arch, are split up into two registers.
2992 ArgValue =
2993 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
2994 } else {
2995 const TargetRegisterClass *RC;
2996 if (RegVT == MVT::i32)
2997 RC = &X86::GR32RegClass;
2998 else if (Is64Bit && RegVT == MVT::i64)
2999 RC = &X86::GR64RegClass;
3000 else if (RegVT == MVT::f32)
3001 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3002 else if (RegVT == MVT::f64)
3003 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3004 else if (RegVT == MVT::f80)
3005 RC = &X86::RFP80RegClass;
3006 else if (RegVT == MVT::f128)
3007 RC = &X86::FR128RegClass;
3008 else if (RegVT.is512BitVector())
3009 RC = &X86::VR512RegClass;
3010 else if (RegVT.is256BitVector())
3011 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3012 else if (RegVT.is128BitVector())
3013 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3014 else if (RegVT == MVT::x86mmx)
3015 RC = &X86::VR64RegClass;
3016 else if (RegVT == MVT::v1i1)
3017 RC = &X86::VK1RegClass;
3018 else if (RegVT == MVT::v8i1)
3019 RC = &X86::VK8RegClass;
3020 else if (RegVT == MVT::v16i1)
3021 RC = &X86::VK16RegClass;
3022 else if (RegVT == MVT::v32i1)
3023 RC = &X86::VK32RegClass;
3024 else if (RegVT == MVT::v64i1)
3025 RC = &X86::VK64RegClass;
3026 else
3027 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3027)
;
3028
3029 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3030 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3031 }
3032
3033 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3034 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3035 // right size.
3036 if (VA.getLocInfo() == CCValAssign::SExt)
3037 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3038 DAG.getValueType(VA.getValVT()));
3039 else if (VA.getLocInfo() == CCValAssign::ZExt)
3040 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3041 DAG.getValueType(VA.getValVT()));
3042 else if (VA.getLocInfo() == CCValAssign::BCvt)
3043 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3044
3045 if (VA.isExtInLoc()) {
3046 // Handle MMX values passed in XMM regs.
3047 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3048 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3049 else if (VA.getValVT().isVector() &&
3050 VA.getValVT().getScalarType() == MVT::i1 &&
3051 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3052 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3053 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3054 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3055 } else
3056 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3057 }
3058 } else {
3059 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3059, __extension__ __PRETTY_FUNCTION__))
;
3060 ArgValue =
3061 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3062 }
3063
3064 // If value is passed via pointer - do a load.
3065 if (VA.getLocInfo() == CCValAssign::Indirect)
3066 ArgValue =
3067 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3068
3069 InVals.push_back(ArgValue);
3070 }
3071
3072 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3073 // Swift calling convention does not require we copy the sret argument
3074 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3075 if (CallConv == CallingConv::Swift)
3076 continue;
3077
3078 // All x86 ABIs require that for returning structs by value we copy the
3079 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3080 // the argument into a virtual register so that we can access it from the
3081 // return points.
3082 if (Ins[I].Flags.isSRet()) {
3083 unsigned Reg = FuncInfo->getSRetReturnReg();
3084 if (!Reg) {
3085 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3086 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3087 FuncInfo->setSRetReturnReg(Reg);
3088 }
3089 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3090 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3091 break;
3092 }
3093 }
3094
3095 unsigned StackSize = CCInfo.getNextStackOffset();
3096 // Align stack specially for tail calls.
3097 if (shouldGuaranteeTCO(CallConv,
3098 MF.getTarget().Options.GuaranteedTailCallOpt))
3099 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3100
3101 // If the function takes variable number of arguments, make a frame index for
3102 // the start of the first vararg value... for expansion of llvm.va_start. We
3103 // can skip this if there are no va_start calls.
3104 if (MFI.hasVAStart() &&
3105 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3106 CallConv != CallingConv::X86_ThisCall))) {
3107 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3108 }
3109
3110 // Figure out if XMM registers are in use.
3111 assert(!(Subtarget.useSoftFloat() &&(static_cast <bool> (!(Subtarget.useSoftFloat() &&
F.hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3113, __extension__ __PRETTY_FUNCTION__))
3112 F.hasFnAttribute(Attribute::NoImplicitFloat)) &&(static_cast <bool> (!(Subtarget.useSoftFloat() &&
F.hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3113, __extension__ __PRETTY_FUNCTION__))
3113 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(Subtarget.useSoftFloat() &&
F.hasFnAttribute(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(Subtarget.useSoftFloat() && F.hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3113, __extension__ __PRETTY_FUNCTION__))
;
3114
3115 // 64-bit calling conventions support varargs and register parameters, so we
3116 // have to do extra work to spill them in the prologue.
3117 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3118 // Find the first unallocated argument registers.
3119 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3120 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3121 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3122 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3123 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3124, __extension__ __PRETTY_FUNCTION__))
3124 "SSE register cannot be used when SSE is disabled!")(static_cast <bool> (!(NumXMMRegs && !Subtarget
.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? void (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3124, __extension__ __PRETTY_FUNCTION__))
;
3125
3126 // Gather all the live in physical registers.
3127 SmallVector<SDValue, 6> LiveGPRs;
3128 SmallVector<SDValue, 8> LiveXMMRegs;
3129 SDValue ALVal;
3130 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3131 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3132 LiveGPRs.push_back(
3133 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3134 }
3135 if (!ArgXMMs.empty()) {
3136 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3137 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3138 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3139 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3140 LiveXMMRegs.push_back(
3141 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3142 }
3143 }
3144
3145 if (IsWin64) {
3146 // Get to the caller-allocated home save location. Add 8 to account
3147 // for the return address.
3148 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3149 FuncInfo->setRegSaveFrameIndex(
3150 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3151 // Fixup to set vararg frame on shadow area (4 x i64).
3152 if (NumIntRegs < 4)
3153 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3154 } else {
3155 // For X86-64, if there are vararg parameters that are passed via
3156 // registers, then we must store them to their spots on the stack so
3157 // they may be loaded by dereferencing the result of va_next.
3158 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3159 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3160 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3161 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3162 }
3163
3164 // Store the integer parameter registers.
3165 SmallVector<SDValue, 8> MemOps;
3166 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3167 getPointerTy(DAG.getDataLayout()));
3168 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3169 for (SDValue Val : LiveGPRs) {
3170 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3171 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3172 SDValue Store =
3173 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3174 MachinePointerInfo::getFixedStack(
3175 DAG.getMachineFunction(),
3176 FuncInfo->getRegSaveFrameIndex(), Offset));
3177 MemOps.push_back(Store);
3178 Offset += 8;
3179 }
3180
3181 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3182 // Now store the XMM (fp + vector) parameter registers.
3183 SmallVector<SDValue, 12> SaveXMMOps;
3184 SaveXMMOps.push_back(Chain);
3185 SaveXMMOps.push_back(ALVal);
3186 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3187 FuncInfo->getRegSaveFrameIndex(), dl));
3188 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3189 FuncInfo->getVarArgsFPOffset(), dl));
3190 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3191 LiveXMMRegs.end());
3192 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3193 MVT::Other, SaveXMMOps));
3194 }
3195
3196 if (!MemOps.empty())
3197 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3198 }
3199
3200 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3201 // Find the largest legal vector type.
3202 MVT VecVT = MVT::Other;
3203 // FIXME: Only some x86_32 calling conventions support AVX512.
3204 if (Subtarget.hasAVX512() &&
3205 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3206 CallConv == CallingConv::Intel_OCL_BI)))
3207 VecVT = MVT::v16f32;
3208 else if (Subtarget.hasAVX())
3209 VecVT = MVT::v8f32;
3210 else if (Subtarget.hasSSE2())
3211 VecVT = MVT::v4f32;
3212
3213 // We forward some GPRs and some vector types.
3214 SmallVector<MVT, 2> RegParmTypes;
3215 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3216 RegParmTypes.push_back(IntVT);
3217 if (VecVT != MVT::Other)
3218 RegParmTypes.push_back(VecVT);
3219
3220 // Compute the set of forwarded registers. The rest are scratch.
3221 SmallVectorImpl<ForwardedRegister> &Forwards =
3222 FuncInfo->getForwardedMustTailRegParms();
3223 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3224
3225 // Conservatively forward AL on x86_64, since it might be used for varargs.
3226 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3227 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3228 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3229 }
3230
3231 // Copy all forwards from physical to virtual registers.
3232 for (ForwardedRegister &F : Forwards) {
3233 // FIXME: Can we use a less constrained schedule?
3234 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3235 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
3236 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
3237 }
3238 }
3239
3240 // Some CCs need callee pop.
3241 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3242 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3243 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3244 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3245 // X86 interrupts must pop the error code (and the alignment padding) if
3246 // present.
3247 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3248 } else {
3249 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3250 // If this is an sret function, the return should pop the hidden pointer.
3251 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3252 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3253 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3254 FuncInfo->setBytesToPopOnReturn(4);
3255 }
3256
3257 if (!Is64Bit) {
3258 // RegSaveFrameIndex is X86-64 only.
3259 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3260 if (CallConv == CallingConv::X86_FastCall ||
3261 CallConv == CallingConv::X86_ThisCall)
3262 // fastcc functions can't have varargs.
3263 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3264 }
3265
3266 FuncInfo->setArgumentStackSize(StackSize);
3267
3268 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3269 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3270 if (Personality == EHPersonality::CoreCLR) {
3271 assert(Is64Bit)(static_cast <bool> (Is64Bit) ? void (0) : __assert_fail
("Is64Bit", "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3271, __extension__ __PRETTY_FUNCTION__))
;
3272 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3273 // that we'd prefer this slot be allocated towards the bottom of the frame
3274 // (i.e. near the stack pointer after allocating the frame). Every
3275 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3276 // offset from the bottom of this and each funclet's frame must be the
3277 // same, so the size of funclets' (mostly empty) frames is dictated by
3278 // how far this slot is from the bottom (since they allocate just enough
3279 // space to accommodate holding this slot at the correct offset).
3280 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3281 EHInfo->PSPSymFrameIdx = PSPSymFI;
3282 }
3283 }
3284
3285 if (CallConv == CallingConv::X86_RegCall ||
3286 F.hasFnAttribute("no_caller_saved_registers")) {
3287 MachineRegisterInfo &MRI = MF.getRegInfo();
3288 for (std::pair<unsigned, unsigned> Pair : MRI.liveins())
3289 MRI.disableCalleeSavedRegister(Pair.first);
3290 }
3291
3292 return Chain;
3293}
3294
3295SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3296 SDValue Arg, const SDLoc &dl,
3297 SelectionDAG &DAG,
3298 const CCValAssign &VA,
3299 ISD::ArgFlagsTy Flags) const {
3300 unsigned LocMemOffset = VA.getLocMemOffset();
3301 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3302 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3303 StackPtr, PtrOff);
3304 if (Flags.isByVal())
3305 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3306
3307 return DAG.getStore(
3308 Chain, dl, Arg, PtrOff,
3309 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3310}
3311
3312/// Emit a load of return address if tail call
3313/// optimization is performed and it is required.
3314SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3315 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3316 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3317 // Adjust the Return address stack slot.
3318 EVT VT = getPointerTy(DAG.getDataLayout());
3319 OutRetAddr = getReturnAddressFrameIndex(DAG);
3320
3321 // Load the "old" Return address.
3322 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3323 return SDValue(OutRetAddr.getNode(), 1);
3324}
3325
3326/// Emit a store of the return address if tail call
3327/// optimization is performed and it is required (FPDiff!=0).
3328static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3329 SDValue Chain, SDValue RetAddrFrIdx,
3330 EVT PtrVT, unsigned SlotSize,
3331 int FPDiff, const SDLoc &dl) {
3332 // Store the return address to the appropriate stack slot.
3333 if (!FPDiff) return Chain;
3334 // Calculate the new stack slot for the return address.
3335 int NewReturnAddrFI =
3336 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3337 false);
3338 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3339 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3340 MachinePointerInfo::getFixedStack(
3341 DAG.getMachineFunction(), NewReturnAddrFI));
3342 return Chain;
3343}
3344
3345/// Returns a vector_shuffle mask for an movs{s|d}, movd
3346/// operation of specified width.
3347static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3348 SDValue V2) {
3349 unsigned NumElems = VT.getVectorNumElements();
3350 SmallVector<int, 8> Mask;
3351 Mask.push_back(NumElems);
3352 for (unsigned i = 1; i != NumElems; ++i)
3353 Mask.push_back(i);
3354 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3355}
3356
3357SDValue
3358X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3359 SmallVectorImpl<SDValue> &InVals) const {
3360 SelectionDAG &DAG = CLI.DAG;
3361 SDLoc &dl = CLI.DL;
3362 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3363 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3364 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3365 SDValue Chain = CLI.Chain;
3366 SDValue Callee = CLI.Callee;
3367 CallingConv::ID CallConv = CLI.CallConv;
3368 bool &isTailCall = CLI.IsTailCall;
3369 bool isVarArg = CLI.IsVarArg;
3370
3371 MachineFunction &MF = DAG.getMachineFunction();
3372 bool Is64Bit = Subtarget.is64Bit();
3373 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3374 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3375 bool IsSibcall = false;
3376 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3377 auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
3378 const auto *CI = dyn_cast_or_null<CallInst>(CLI.CS.getInstruction());
3379 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3380 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3381 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3382
3383 if (CallConv == CallingConv::X86_INTR)
3384 report_fatal_error("X86 interrupts may not be called directly");
3385
3386 if (Attr.getValueAsString() == "true")
3387 isTailCall = false;
3388
3389 if (Subtarget.isPICStyleGOT() &&
3390 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3391 // If we are using a GOT, disable tail calls to external symbols with
3392 // default visibility. Tail calling such a symbol requires using a GOT
3393 // relocation, which forces early binding of the symbol. This breaks code
3394 // that require lazy function symbol resolution. Using musttail or
3395 // GuaranteedTailCallOpt will override this.
3396 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3397 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3398 G->getGlobal()->hasDefaultVisibility()))
3399 isTailCall = false;
3400 }
3401
3402 bool IsMustTail = CLI.CS && CLI.CS.isMustTailCall();
3403 if (IsMustTail) {
3404 // Force this to be a tail call. The verifier rules are enough to ensure
3405 // that we can lower this successfully without moving the return address
3406 // around.
3407 isTailCall = true;
3408 } else if (isTailCall) {
3409 // Check if it's really possible to do a tail call.
3410 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3411 isVarArg, SR != NotStructReturn,
3412 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3413 Outs, OutVals, Ins, DAG);
3414
3415 // Sibcalls are automatically detected tailcalls which do not require
3416 // ABI changes.
3417 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3418 IsSibcall = true;
3419
3420 if (isTailCall)
3421 ++NumTailCalls;
3422 }
3423
3424 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3425, __extension__ __PRETTY_FUNCTION__))
3425 "Var args not supported with calling convention fastcc, ghc or hipe")(static_cast <bool> (!(isVarArg && canGuaranteeTCO
(CallConv)) && "Var args not supported with calling convention fastcc, ghc or hipe"
) ? void (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3425, __extension__ __PRETTY_FUNCTION__))
;
3426
3427 // Analyze operands of the call, assigning locations to each operand.
3428 SmallVector<CCValAssign, 16> ArgLocs;
3429 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3430
3431 // Allocate shadow area for Win64.
3432 if (IsWin64)
3433 CCInfo.AllocateStack(32, 8);
3434
3435 CCInfo.AnalyzeArguments(Outs, CC_X86);
3436
3437 // In vectorcall calling convention a second pass is required for the HVA
3438 // types.
3439 if (CallingConv::X86_VectorCall == CallConv) {
3440 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3441 }
3442
3443 // Get a count of how many bytes are to be pushed on the stack.
3444 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3445 if (IsSibcall)
3446 // This is a sibcall. The memory operands are available in caller's
3447 // own caller's stack.
3448 NumBytes = 0;
3449 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3450 canGuaranteeTCO(CallConv))
3451 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3452
3453 int FPDiff = 0;
3454 if (isTailCall && !IsSibcall && !IsMustTail) {
3455 // Lower arguments at fp - stackoffset + fpdiff.
3456 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3457
3458 FPDiff = NumBytesCallerPushed - NumBytes;
3459
3460 // Set the delta of movement of the returnaddr stackslot.
3461 // But only set if delta is greater than previous delta.
3462 if (FPDiff < X86Info->getTCReturnAddrDelta())
3463 X86Info->setTCReturnAddrDelta(FPDiff);
3464 }
3465
3466 unsigned NumBytesToPush = NumBytes;
3467 unsigned NumBytesToPop = NumBytes;
3468
3469 // If we have an inalloca argument, all stack space has already been allocated
3470 // for us and be right at the top of the stack. We don't support multiple
3471 // arguments passed in memory when using inalloca.
3472 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3473 NumBytesToPush = 0;
3474 if (!ArgLocs.back().isMemLoc())
3475 report_fatal_error("cannot use inalloca attribute on a register "
3476 "parameter");
3477 if (ArgLocs.back().getLocMemOffset() != 0)
3478 report_fatal_error("any parameter with the inalloca attribute must be "
3479 "the only memory argument");
3480 }
3481
3482 if (!IsSibcall)
3483 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3484 NumBytes - NumBytesToPush, dl);
3485
3486 SDValue RetAddrFrIdx;
3487 // Load return address for tail calls.
3488 if (isTailCall && FPDiff)
3489 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3490 Is64Bit, FPDiff, dl);
3491
3492 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3493 SmallVector<SDValue, 8> MemOpChains;
3494 SDValue StackPtr;
3495
3496 // The next loop assumes that the locations are in the same order of the
3497 // input arguments.
3498 assert(isSortedByValueNo(ArgLocs) &&(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3499, __extension__ __PRETTY_FUNCTION__))
3499 "Argument Location list must be sorted before lowering")(static_cast <bool> (isSortedByValueNo(ArgLocs) &&
"Argument Location list must be sorted before lowering") ? void
(0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3499, __extension__ __PRETTY_FUNCTION__))
;
3500
3501 // Walk the register/memloc assignments, inserting copies/loads. In the case
3502 // of tail call optimization arguments are handle later.
3503 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3504 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3505 ++I, ++OutIndex) {
3506 assert(OutIndex < Outs.size() && "Invalid Out index")(static_cast <bool> (OutIndex < Outs.size() &&
"Invalid Out index") ? void (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3506, __extension__ __PRETTY_FUNCTION__))
;
3507 // Skip inalloca arguments, they have already been written.
3508 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3509 if (Flags.isInAlloca())
3510 continue;
3511
3512 CCValAssign &VA = ArgLocs[I];
3513 EVT RegVT = VA.getLocVT();
3514 SDValue Arg = OutVals[OutIndex];
3515 bool isByVal = Flags.isByVal();
3516
3517 // Promote the value if needed.
3518 switch (VA.getLocInfo()) {
3519 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3519)
;
3520 case CCValAssign::Full: break;
3521 case CCValAssign::SExt:
3522 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3523 break;
3524 case CCValAssign::ZExt:
3525 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3526 break;
3527 case CCValAssign::AExt:
3528 if (Arg.getValueType().isVector() &&
3529 Arg.getValueType().getVectorElementType() == MVT::i1)
3530 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3531 else if (RegVT.is128BitVector()) {
3532 // Special case: passing MMX values in XMM registers.
3533 Arg = DAG.getBitcast(MVT::i64, Arg);
3534 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3535 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3536 } else
3537 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3538 break;
3539 case CCValAssign::BCvt:
3540 Arg = DAG.getBitcast(RegVT, Arg);
3541 break;
3542 case CCValAssign::Indirect: {
3543 // Store the argument.
3544 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3545 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3546 Chain = DAG.getStore(
3547 Chain, dl, Arg, SpillSlot,
3548 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3549 Arg = SpillSlot;
3550 break;
3551 }
3552 }
3553
3554 if (VA.needsCustom()) {
3555 assert(VA.getValVT() == MVT::v64i1 &&(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3556, __extension__ __PRETTY_FUNCTION__))
3556 "Currently the only custom case is when we split v64i1 to 2 regs")(static_cast <bool> (VA.getValVT() == MVT::v64i1 &&
"Currently the only custom case is when we split v64i1 to 2 regs"
) ? void (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3556, __extension__ __PRETTY_FUNCTION__))
;
3557 // Split v64i1 value into two registers
3558 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3559 Subtarget);
3560 } else if (VA.isRegLoc()) {
3561 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3562 if (isVarArg && IsWin64) {
3563 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3564 // shadow reg if callee is a varargs function.
3565 unsigned ShadowReg = 0;
3566 switch (VA.getLocReg()) {
3567 case X86::XMM0: ShadowReg = X86::RCX; break;
3568 case X86::XMM1: ShadowReg = X86::RDX; break;
3569 case X86::XMM2: ShadowReg = X86::R8; break;
3570 case X86::XMM3: ShadowReg = X86::R9; break;
3571 }
3572 if (ShadowReg)
3573 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3574 }
3575 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3576 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3576, __extension__ __PRETTY_FUNCTION__))
;
3577 if (!StackPtr.getNode())
3578 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3579 getPointerTy(DAG.getDataLayout()));
3580 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3581 dl, DAG, VA, Flags));
3582 }
3583 }
3584
3585 if (!MemOpChains.empty())
3586 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3587
3588 if (Subtarget.isPICStyleGOT()) {
3589 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3590 // GOT pointer.
3591 if (!isTailCall) {
3592 RegsToPass.push_back(std::make_pair(
3593 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3594 getPointerTy(DAG.getDataLayout()))));
3595 } else {
3596 // If we are tail calling and generating PIC/GOT style code load the
3597 // address of the callee into ECX. The value in ecx is used as target of
3598 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3599 // for tail calls on PIC/GOT architectures. Normally we would just put the
3600 // address of GOT into ebx and then call target@PLT. But for tail calls
3601 // ebx would be restored (since ebx is callee saved) before jumping to the
3602 // target@PLT.
3603
3604 // Note: The actual moving to ECX is done further down.
3605 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3606 if (G && !G->getGlobal()->hasLocalLinkage() &&
3607 G->getGlobal()->hasDefaultVisibility())
3608 Callee = LowerGlobalAddress(Callee, DAG);
3609 else if (isa<ExternalSymbolSDNode>(Callee))
3610 Callee = LowerExternalSymbol(Callee, DAG);
3611 }
3612 }
3613
3614 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3615 // From AMD64 ABI document:
3616 // For calls that may call functions that use varargs or stdargs
3617 // (prototype-less calls or calls to functions containing ellipsis (...) in
3618 // the declaration) %al is used as hidden argument to specify the number
3619 // of SSE registers used. The contents of %al do not need to match exactly
3620 // the number of registers, but must be an ubound on the number of SSE
3621 // registers used and is in the range 0 - 8 inclusive.
3622
3623 // Count the number of XMM registers allocated.
3624 static const MCPhysReg XMMArgRegs[] = {
3625 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3626 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3627 };
3628 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3629 assert((Subtarget.hasSSE1() || !NumXMMRegs)(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3630, __extension__ __PRETTY_FUNCTION__))
3630 && "SSE registers cannot be used when SSE is disabled")(static_cast <bool> ((Subtarget.hasSSE1() || !NumXMMRegs
) && "SSE registers cannot be used when SSE is disabled"
) ? void (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3630, __extension__ __PRETTY_FUNCTION__))
;
3631
3632 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3633 DAG.getConstant(NumXMMRegs, dl,
3634 MVT::i8)));
3635 }
3636
3637 if (isVarArg && IsMustTail) {
3638 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3639 for (const auto &F : Forwards) {
3640 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3641 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3642 }
3643 }
3644
3645 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3646 // don't need this because the eligibility check rejects calls that require
3647 // shuffling arguments passed in memory.
3648 if (!IsSibcall && isTailCall) {
3649 // Force all the incoming stack arguments to be loaded from the stack
3650 // before any new outgoing arguments are stored to the stack, because the
3651 // outgoing stack slots may alias the incoming argument stack slots, and
3652 // the alias isn't otherwise explicit. This is slightly more conservative
3653 // than necessary, because it means that each store effectively depends
3654 // on every argument instead of just those arguments it would clobber.
3655 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3656
3657 SmallVector<SDValue, 8> MemOpChains2;
3658 SDValue FIN;
3659 int FI = 0;
3660 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3661 ++I, ++OutsIndex) {
3662 CCValAssign &VA = ArgLocs[I];
3663
3664 if (VA.isRegLoc()) {
3665 if (VA.needsCustom()) {
3666 assert((CallConv == CallingConv::X86_RegCall) &&(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3667, __extension__ __PRETTY_FUNCTION__))
3667 "Expecting custom case only in regcall calling convention")(static_cast <bool> ((CallConv == CallingConv::X86_RegCall
) && "Expecting custom case only in regcall calling convention"
) ? void (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3667, __extension__ __PRETTY_FUNCTION__))
;
3668 // This means that we are in special case where one argument was
3669 // passed through two register locations - Skip the next location
3670 ++I;
3671 }
3672
3673 continue;
3674 }
3675
3676 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3676, __extension__ __PRETTY_FUNCTION__))
;
3677 SDValue Arg = OutVals[OutsIndex];
3678 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3679 // Skip inalloca arguments. They don't require any work.
3680 if (Flags.isInAlloca())
3681 continue;
3682 // Create frame index.
3683 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3684 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3685 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3686 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3687
3688 if (Flags.isByVal()) {
3689 // Copy relative to framepointer.
3690 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3691 if (!StackPtr.getNode())
3692 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3693 getPointerTy(DAG.getDataLayout()));
3694 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3695 StackPtr, Source);
3696
3697 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3698 ArgChain,
3699 Flags, DAG, dl));
3700 } else {
3701 // Store relative to framepointer.
3702 MemOpChains2.push_back(DAG.getStore(
3703 ArgChain, dl, Arg, FIN,
3704 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3705 }
3706 }
3707
3708 if (!MemOpChains2.empty())
3709 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3710
3711 // Store the return address to the appropriate stack slot.
3712 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3713 getPointerTy(DAG.getDataLayout()),
3714 RegInfo->getSlotSize(), FPDiff, dl);
3715 }
3716
3717 // Build a sequence of copy-to-reg nodes chained together with token chain
3718 // and flag operands which copy the outgoing args into registers.
3719 SDValue InFlag;
3720 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3721 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3722 RegsToPass[i].second, InFlag);
3723 InFlag = Chain.getValue(1);
3724 }
3725
3726 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3727 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")(static_cast <bool> (Is64Bit && "Large code model is only legal in 64-bit mode."
) ? void (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3727, __extension__ __PRETTY_FUNCTION__))
;
3728 // In the 64-bit large code model, we have to make all calls
3729 // through a register, since the call instruction's 32-bit
3730 // pc-relative offset may not be large enough to hold the whole
3731 // address.
3732 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3733 // If the callee is a GlobalAddress node (quite common, every direct call
3734 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3735 // it.
3736 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3737
3738 // We should use extra load for direct calls to dllimported functions in
3739 // non-JIT mode.
3740 const GlobalValue *GV = G->getGlobal();
3741 if (!GV->hasDLLImportStorageClass()) {
3742 unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
3743
3744 Callee = DAG.getTargetGlobalAddress(
3745 GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
3746
3747 if (OpFlags == X86II::MO_GOTPCREL) {
3748 // Add a wrapper.
3749 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3750 getPointerTy(DAG.getDataLayout()), Callee);
3751 // Add extra indirection
3752 Callee = DAG.getLoad(
3753 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3754 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3755 }
3756 }
3757 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3758 const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
3759 unsigned char OpFlags =
3760 Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
3761
3762 Callee = DAG.getTargetExternalSymbol(
3763 S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
3764 } else if (Subtarget.isTarget64BitILP32() &&
3765 Callee->getValueType(0) == MVT::i32) {
3766 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3767 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3768 }
3769
3770 // Returns a chain & a flag for retval copy to use.
3771 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3772 SmallVector<SDValue, 8> Ops;
3773
3774 if (!IsSibcall && isTailCall) {
3775 Chain = DAG.getCALLSEQ_END(Chain,
3776 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3777 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3778 InFlag = Chain.getValue(1);
3779 }
3780
3781 Ops.push_back(Chain);
3782 Ops.push_back(Callee);
3783
3784 if (isTailCall)
3785 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
3786
3787 // Add argument registers to the end of the list so that they are known live
3788 // into the call.
3789 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3790 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3791 RegsToPass[i].second.getValueType()));
3792
3793 // Add a register mask operand representing the call-preserved registers.
3794 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
3795 // set X86_INTR calling convention because it has the same CSR mask
3796 // (same preserved registers).
3797 const uint32_t *Mask = RegInfo->getCallPreservedMask(
3798 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
3799 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 3799, __extension__ __PRETTY_FUNCTION__))
;
3800
3801 // If this is an invoke in a 32-bit function using a funclet-based
3802 // personality, assume the function clobbers all registers. If an exception
3803 // is thrown, the runtime will not restore CSRs.
3804 // FIXME: Model this more precisely so that we can register allocate across
3805 // the normal edge and spill and fill across the exceptional edge.
3806 if (!Is64Bit && CLI.CS && CLI.CS.isInvoke()) {
3807 const Function &CallerFn = MF.getFunction();
3808 EHPersonality Pers =
3809 CallerFn.hasPersonalityFn()
3810 ? classifyEHPersonality(CallerFn.getPersonalityFn())
3811 : EHPersonality::Unknown;
3812 if (isFuncletEHPersonality(Pers))
3813 Mask = RegInfo->getNoPreservedMask();
3814 }
3815
3816 // Define a new register mask from the existing mask.
3817 uint32_t *RegMask = nullptr;
3818
3819 // In some calling conventions we need to remove the used physical registers
3820 // from the reg mask.
3821 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
3822 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3823
3824 // Allocate a new Reg Mask and copy Mask.
3825 RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
3826 unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
3827 memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);
3828
3829 // Make sure all sub registers of the argument registers are reset
3830 // in the RegMask.
3831 for (auto const &RegPair : RegsToPass)
3832 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
3833 SubRegs.isValid(); ++SubRegs)
3834 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3835
3836 // Create the RegMask Operand according to our updated mask.
3837 Ops.push_back(DAG.getRegisterMask(RegMask));
3838 } else {
3839 // Create the RegMask Operand according to the static mask.
3840 Ops.push_back(DAG.getRegisterMask(Mask));
3841 }
3842
3843 if (InFlag.getNode())
3844 Ops.push_back(InFlag);
3845
3846 if (isTailCall) {
3847 // We used to do:
3848 //// If this is the first return lowered for this function, add the regs
3849 //// to the liveout set for the function.
3850 // This isn't right, although it's probably harmless on x86; liveouts
3851 // should be computed from returns not tail calls. Consider a void
3852 // function making a tail call to a function returning int.
3853 MF.getFrameInfo().setHasTailCall();
3854 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
3855 }
3856
3857 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
3858 InFlag = Chain.getValue(1);
3859
3860 // Create the CALLSEQ_END node.
3861 unsigned NumBytesForCalleeToPop;
3862 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3863 DAG.getTarget().Options.GuaranteedTailCallOpt))
3864 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
3865 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3866 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3867 SR == StackStructReturn)
3868 // If this is a call to a struct-return function, the callee
3869 // pops the hidden struct pointer, so we have to push it back.
3870 // This is common for Darwin/X86, Linux & Mingw32 targets.
3871 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
3872 NumBytesForCalleeToPop = 4;
3873 else
3874 NumBytesForCalleeToPop = 0; // Callee pops nothing.
3875
3876 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
3877 // No need to reset the stack after the call if the call doesn't return. To
3878 // make the MI verify, we'll pretend the callee does it for us.
3879 NumBytesForCalleeToPop = NumBytes;
3880 }
3881
3882 // Returns a flag for retval copy to use.
3883 if (!IsSibcall) {
3884 Chain = DAG.getCALLSEQ_END(Chain,
3885 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3886 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
3887 true),
3888 InFlag, dl);
3889 InFlag = Chain.getValue(1);
3890 }
3891
3892 // Handle result values, copying them out of physregs into vregs that we
3893 // return.
3894 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
3895 InVals, RegMask);
3896}
3897
3898//===----------------------------------------------------------------------===//
3899// Fast Calling Convention (tail call) implementation
3900//===----------------------------------------------------------------------===//
3901
3902// Like std call, callee cleans arguments, convention except that ECX is
3903// reserved for storing the tail called function address. Only 2 registers are
3904// free for argument passing (inreg). Tail call optimization is performed
3905// provided:
3906// * tailcallopt is enabled
3907// * caller/callee are fastcc
3908// On X86_64 architecture with GOT-style position independent code only local
3909// (within module) calls are supported at the moment.
3910// To keep the stack aligned according to platform abi the function
3911// GetAlignedArgumentStackSize ensures that argument delta is always multiples
3912// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
3913// If a tail called function callee has more arguments than the caller the
3914// caller needs to make sure that there is room to move the RETADDR to. This is
3915// achieved by reserving an area the size of the argument delta right after the
3916// original RETADDR, but before the saved framepointer or the spilled registers
3917// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
3918// stack layout:
3919// arg1
3920// arg2
3921// RETADDR
3922// [ new RETADDR
3923// move area ]
3924// (possible EBP)
3925// ESI
3926// EDI
3927// local1 ..
3928
3929/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
3930/// requirement.
3931unsigned
3932X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
3933 SelectionDAG& DAG) const {
3934 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3935 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3936 unsigned StackAlignment = TFI.getStackAlignment();
3937 uint64_t AlignMask = StackAlignment - 1;
3938 int64_t Offset = StackSize;
3939 unsigned SlotSize = RegInfo->getSlotSize();
3940 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
3941 // Number smaller than 12 so just add the difference.
3942 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
3943 } else {
3944 // Mask out lower bits, add stackalignment once plus the 12 bytes.
3945 Offset = ((~AlignMask) & Offset) + StackAlignment +
3946 (StackAlignment-SlotSize);
3947 }
3948 return Offset;
3949}
3950
3951/// Return true if the given stack call argument is already available in the
3952/// same position (relatively) of the caller's incoming argument stack.
3953static
3954bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
3955 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
3956 const X86InstrInfo *TII, const CCValAssign &VA) {
3957 unsigned Bytes = Arg.getValueSizeInBits() / 8;
3958
3959 for (;;) {
3960 // Look through nodes that don't alter the bits of the incoming value.
3961 unsigned Op = Arg.getOpcode();
3962 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
3963 Arg = Arg.getOperand(0);
3964 continue;
3965 }
3966 if (Op == ISD::TRUNCATE) {
3967 const SDValue &TruncInput = Arg.getOperand(0);
3968 if (TruncInput.getOpcode() == ISD::AssertZext &&
3969 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
3970 Arg.getValueType()) {
3971 Arg = TruncInput.getOperand(0);
3972 continue;
3973 }
3974 }
3975 break;
3976 }
3977
3978 int FI = INT_MAX2147483647;
3979 if (Arg.getOpcode() == ISD::CopyFromReg) {
3980 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
3981 if (!TargetRegisterInfo::isVirtualRegister(VR))
3982 return false;
3983 MachineInstr *Def = MRI->getVRegDef(VR);
3984 if (!Def)
3985 return false;
3986 if (!Flags.isByVal()) {
3987 if (!TII->isLoadFromStackSlot(*Def, FI))
3988 return false;
3989 } else {
3990 unsigned Opcode = Def->getOpcode();
3991 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
3992 Opcode == X86::LEA64_32r) &&
3993 Def->getOperand(1).isFI()) {
3994 FI = Def->getOperand(1).getIndex();
3995 Bytes = Flags.getByValSize();
3996 } else
3997 return false;
3998 }
3999 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4000 if (Flags.isByVal())
4001 // ByVal argument is passed in as a pointer but it's now being
4002 // dereferenced. e.g.
4003 // define @foo(%struct.X* %A) {
4004 // tail call @bar(%struct.X* byval %A)
4005 // }
4006 return false;
4007 SDValue Ptr = Ld->getBasePtr();
4008 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4009 if (!FINode)
4010 return false;
4011 FI = FINode->getIndex();
4012 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4013 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4014 FI = FINode->getIndex();
4015 Bytes = Flags.getByValSize();
4016 } else
4017 return false;
4018
4019 assert(FI != INT_MAX)(static_cast <bool> (FI != 2147483647) ? void (0) : __assert_fail
("FI != INT_MAX", "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4019, __extension__ __PRETTY_FUNCTION__))
;
4020 if (!MFI.isFixedObjectIndex(FI))
4021 return false;
4022
4023 if (Offset != MFI.getObjectOffset(FI))
4024 return false;
4025
4026 // If this is not byval, check that the argument stack object is immutable.
4027 // inalloca and argument copy elision can create mutable argument stack
4028 // objects. Byval objects can be mutated, but a byval call intends to pass the
4029 // mutated memory.
4030 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4031 return false;
4032
4033 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4034 // If the argument location is wider than the argument type, check that any
4035 // extension flags match.
4036 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4037 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4038 return false;
4039 }
4040 }
4041
4042 return Bytes == MFI.getObjectSize(FI);
4043}
4044
4045/// Check whether the call is eligible for tail call optimization. Targets
4046/// that want to do tail call optimization should implement this function.
4047bool X86TargetLowering::IsEligibleForTailCallOptimization(
4048 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4049 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4050 const SmallVectorImpl<ISD::OutputArg> &Outs,
4051 const SmallVectorImpl<SDValue> &OutVals,
4052 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4053 if (!mayTailCallThisCC(CalleeCC))
4054 return false;
4055
4056 // If -tailcallopt is specified, make fastcc functions tail-callable.
4057 MachineFunction &MF = DAG.getMachineFunction();
4058 const Function &CallerF = MF.getFunction();
4059
4060 // If the function return type is x86_fp80 and the callee return type is not,
4061 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4062 // perform a tailcall optimization here.
4063 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4064 return false;
4065
4066 CallingConv::ID CallerCC = CallerF.getCallingConv();
4067 bool CCMatch = CallerCC == CalleeCC;
4068 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4069 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4070
4071 // Win64 functions have extra shadow space for argument homing. Don't do the
4072 // sibcall if the caller and callee have mismatched expectations for this
4073 // space.
4074 if (IsCalleeWin64 != IsCallerWin64)
4075 return false;
4076
4077 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4078 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4079 return true;
4080 return false;
4081 }
4082
4083 // Look for obvious safe cases to perform tail call optimization that do not
4084 // require ABI changes. This is what gcc calls sibcall.
4085
4086 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4087 // emit a special epilogue.
4088 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4089 if (RegInfo->needsStackRealignment(MF))
4090 return false;
4091
4092 // Also avoid sibcall optimization if either caller or callee uses struct
4093 // return semantics.
4094 if (isCalleeStructRet || isCallerStructRet)
4095 return false;
4096
4097 // Do not sibcall optimize vararg calls unless all arguments are passed via
4098 // registers.
4099 LLVMContext &C = *DAG.getContext();
4100 if (isVarArg && !Outs.empty()) {
4101 // Optimizing for varargs on Win64 is unlikely to be safe without
4102 // additional testing.
4103 if (IsCalleeWin64 || IsCallerWin64)
4104 return false;
4105
4106 SmallVector<CCValAssign, 16> ArgLocs;
4107 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4108
4109 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4110 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4111 if (!ArgLocs[i].isRegLoc())
4112 return false;
4113 }
4114
4115 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4116 // stack. Therefore, if it's not used by the call it is not safe to optimize
4117 // this into a sibcall.
4118 bool Unused = false;
4119 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4120 if (!Ins[i].Used) {
4121 Unused = true;
4122 break;
4123 }
4124 }
4125 if (Unused) {
4126 SmallVector<CCValAssign, 16> RVLocs;
4127 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4128 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4129 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4130 CCValAssign &VA = RVLocs[i];
4131 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4132 return false;
4133 }
4134 }
4135
4136 // Check that the call results are passed in the same way.
4137 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4138 RetCC_X86, RetCC_X86))
4139 return false;
4140 // The callee has to preserve all registers the caller needs to preserve.
4141 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4142 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4143 if (!CCMatch) {
4144 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4145 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4146 return false;
4147 }
4148
4149 unsigned StackArgsSize = 0;
4150
4151 // If the callee takes no arguments then go on to check the results of the
4152 // call.
4153 if (!Outs.empty()) {
4154 // Check if stack adjustment is needed. For now, do not do this if any
4155 // argument is passed on the stack.
4156 SmallVector<CCValAssign, 16> ArgLocs;
4157 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4158
4159 // Allocate shadow area for Win64
4160 if (IsCalleeWin64)
4161 CCInfo.AllocateStack(32, 8);
4162
4163 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4164 StackArgsSize = CCInfo.getNextStackOffset();
4165
4166 if (CCInfo.getNextStackOffset()) {
4167 // Check if the arguments are already laid out in the right way as
4168 // the caller's fixed stack objects.
4169 MachineFrameInfo &MFI = MF.getFrameInfo();
4170 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4171 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4172 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4173 CCValAssign &VA = ArgLocs[i];
4174 SDValue Arg = OutVals[i];
4175 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4176 if (VA.getLocInfo() == CCValAssign::Indirect)
4177 return false;
4178 if (!VA.isRegLoc()) {
4179 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4180 MFI, MRI, TII, VA))
4181 return false;
4182 }
4183 }
4184 }
4185
4186 bool PositionIndependent = isPositionIndependent();
4187 // If the tailcall address may be in a register, then make sure it's
4188 // possible to register allocate for it. In 32-bit, the call address can
4189 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4190 // callee-saved registers are restored. These happen to be the same
4191 // registers used to pass 'inreg' arguments so watch out for those.
4192 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4193 !isa<ExternalSymbolSDNode>(Callee)) ||
4194 PositionIndependent)) {
4195 unsigned NumInRegs = 0;
4196 // In PIC we need an extra register to formulate the address computation
4197 // for the callee.
4198 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4199
4200 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4201 CCValAssign &VA = ArgLocs[i];
4202 if (!VA.isRegLoc())
4203 continue;
4204 unsigned Reg = VA.getLocReg();
4205 switch (Reg) {
4206 default: break;
4207 case X86::EAX: case X86::EDX: case X86::ECX:
4208 if (++NumInRegs == MaxInRegs)
4209 return false;
4210 break;
4211 }
4212 }
4213 }
4214
4215 const MachineRegisterInfo &MRI = MF.getRegInfo();
4216 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4217 return false;
4218 }
4219
4220 bool CalleeWillPop =
4221 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4222 MF.getTarget().Options.GuaranteedTailCallOpt);
4223
4224 if (unsigned BytesToPop =
4225 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4226 // If we have bytes to pop, the callee must pop them.
4227 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4228 if (!CalleePopMatches)
4229 return false;
4230 } else if (CalleeWillPop && StackArgsSize > 0) {
4231 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4232 return false;
4233 }
4234
4235 return true;
4236}
4237
4238FastISel *
4239X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4240 const TargetLibraryInfo *libInfo) const {
4241 return X86::createFastISel(funcInfo, libInfo);
4242}
4243
4244//===----------------------------------------------------------------------===//
4245// Other Lowering Hooks
4246//===----------------------------------------------------------------------===//
4247
4248static bool MayFoldLoad(SDValue Op) {
4249 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4250}
4251
4252static bool MayFoldIntoStore(SDValue Op) {
4253 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4254}
4255
4256static bool MayFoldIntoZeroExtend(SDValue Op) {
4257 if (Op.hasOneUse()) {
4258 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4259 return (ISD::ZERO_EXTEND == Opcode);
4260 }
4261 return false;
4262}
4263
4264static bool isTargetShuffle(unsigned Opcode) {
4265 switch(Opcode) {
4266 default: return false;
4267 case X86ISD::BLENDI:
4268 case X86ISD::PSHUFB:
4269 case X86ISD::PSHUFD:
4270 case X86ISD::PSHUFHW:
4271 case X86ISD::PSHUFLW:
4272 case X86ISD::SHUFP:
4273 case X86ISD::INSERTPS:
4274 case X86ISD::EXTRQI:
4275 case X86ISD::INSERTQI:
4276 case X86ISD::PALIGNR:
4277 case X86ISD::VSHLDQ:
4278 case X86ISD::VSRLDQ:
4279 case X86ISD::MOVLHPS:
4280 case X86ISD::MOVHLPS:
4281 case X86ISD::MOVLPS:
4282 case X86ISD::MOVLPD:
4283 case X86ISD::MOVSHDUP:
4284 case X86ISD::MOVSLDUP:
4285 case X86ISD::MOVDDUP:
4286 case X86ISD::MOVSS:
4287 case X86ISD::MOVSD:
4288 case X86ISD::UNPCKL:
4289 case X86ISD::UNPCKH:
4290 case X86ISD::VBROADCAST:
4291 case X86ISD::VPERMILPI:
4292 case X86ISD::VPERMILPV:
4293 case X86ISD::VPERM2X128:
4294 case X86ISD::VPERMIL2:
4295 case X86ISD::VPERMI:
4296 case X86ISD::VPPERM:
4297 case X86ISD::VPERMV:
4298 case X86ISD::VPERMV3:
4299 case X86ISD::VPERMIV3:
4300 case X86ISD::VZEXT_MOVL:
4301 return true;
4302 }
4303}
4304
4305static bool isTargetShuffleVariableMask(unsigned Opcode) {
4306 switch (Opcode) {
4307 default: return false;
4308 // Target Shuffles.
4309 case X86ISD::PSHUFB:
4310 case X86ISD::VPERMILPV:
4311 case X86ISD::VPERMIL2:
4312 case X86ISD::VPPERM:
4313 case X86ISD::VPERMV:
4314 case X86ISD::VPERMV3:
4315 case X86ISD::VPERMIV3:
4316 return true;
4317 // 'Faux' Target Shuffles.
4318 case ISD::AND:
4319 case X86ISD::ANDNP:
4320 return true;
4321 }
4322}
4323
4324SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4325 MachineFunction &MF = DAG.getMachineFunction();
4326 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4327 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4328 int ReturnAddrIndex = FuncInfo->getRAIndex();
4329
4330 if (ReturnAddrIndex == 0) {
4331 // Set up a frame object for the return address.
4332 unsigned SlotSize = RegInfo->getSlotSize();
4333 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4334 -(int64_t)SlotSize,
4335 false);
4336 FuncInfo->setRAIndex(ReturnAddrIndex);
4337 }
4338
4339 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4340}
4341
4342bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4343 bool hasSymbolicDisplacement) {
4344 // Offset should fit into 32 bit immediate field.
4345 if (!isInt<32>(Offset))
4346 return false;
4347
4348 // If we don't have a symbolic displacement - we don't have any extra
4349 // restrictions.
4350 if (!hasSymbolicDisplacement)
4351 return true;
4352
4353 // FIXME: Some tweaks might be needed for medium code model.
4354 if (M != CodeModel::Small && M != CodeModel::Kernel)
4355 return false;
4356
4357 // For small code model we assume that latest object is 16MB before end of 31
4358 // bits boundary. We may also accept pretty large negative constants knowing
4359 // that all objects are in the positive half of address space.
4360 if (M == CodeModel::Small && Offset < 16*1024*1024)
4361 return true;
4362
4363 // For kernel code model we know that all object resist in the negative half
4364 // of 32bits address space. We may not accept negative offsets, since they may
4365 // be just off and we may accept pretty large positive ones.
4366 if (M == CodeModel::Kernel && Offset >= 0)
4367 return true;
4368
4369 return false;
4370}
4371
4372/// Determines whether the callee is required to pop its own arguments.
4373/// Callee pop is necessary to support tail calls.
4374bool X86::isCalleePop(CallingConv::ID CallingConv,
4375 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4376 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4377 // can guarantee TCO.
4378 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4379 return true;
4380
4381 switch (CallingConv) {
4382 default:
4383 return false;
4384 case CallingConv::X86_StdCall:
4385 case CallingConv::X86_FastCall:
4386 case CallingConv::X86_ThisCall:
4387 case CallingConv::X86_VectorCall:
4388 return !is64Bit;
4389 }
4390}
4391
4392/// \brief Return true if the condition is an unsigned comparison operation.
4393static bool isX86CCUnsigned(unsigned X86CC) {
4394 switch (X86CC) {
4395 default:
4396 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4396)
;
4397 case X86::COND_E:
4398 case X86::COND_NE:
4399 case X86::COND_B:
4400 case X86::COND_A:
4401 case X86::COND_BE:
4402 case X86::COND_AE:
4403 return true;
4404 case X86::COND_G:
4405 case X86::COND_GE:
4406 case X86::COND_L:
4407 case X86::COND_LE:
4408 return false;
4409 }
4410}
4411
4412static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4413 switch (SetCCOpcode) {
4414 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4414)
;
4415 case ISD::SETEQ: return X86::COND_E;
4416 case ISD::SETGT: return X86::COND_G;
4417 case ISD::SETGE: return X86::COND_GE;
4418 case ISD::SETLT: return X86::COND_L;
4419 case ISD::SETLE: return X86::COND_LE;
4420 case ISD::SETNE: return X86::COND_NE;
4421 case ISD::SETULT: return X86::COND_B;
4422 case ISD::SETUGT: return X86::COND_A;
4423 case ISD::SETULE: return X86::COND_BE;
4424 case ISD::SETUGE: return X86::COND_AE;
4425 }
4426}
4427
4428/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4429/// condition code, returning the condition code and the LHS/RHS of the
4430/// comparison to make.
4431static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4432 bool isFP, SDValue &LHS, SDValue &RHS,
4433 SelectionDAG &DAG) {
4434 if (!isFP) {
4435 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4436 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4437 // X > -1 -> X == 0, jump !sign.
4438 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4439 return X86::COND_NS;
4440 }
4441 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4442 // X < 0 -> X == 0, jump on sign.
4443 return X86::COND_S;
4444 }
4445 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
4446 // X < 1 -> X <= 0
4447 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4448 return X86::COND_LE;
4449 }
4450 }
4451
4452 return TranslateIntegerX86CC(SetCCOpcode);
4453 }
4454
4455 // First determine if it is required or is profitable to flip the operands.
4456
4457 // If LHS is a foldable load, but RHS is not, flip the condition.
4458 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4459 !ISD::isNON_EXTLoad(RHS.getNode())) {
4460 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4461 std::swap(LHS, RHS);
4462 }
4463
4464 switch (SetCCOpcode) {
4465 default: break;
4466 case ISD::SETOLT:
4467 case ISD::SETOLE:
4468 case ISD::SETUGT:
4469 case ISD::SETUGE:
4470 std::swap(LHS, RHS);
4471 break;
4472 }
4473
4474 // On a floating point condition, the flags are set as follows:
4475 // ZF PF CF op
4476 // 0 | 0 | 0 | X > Y
4477 // 0 | 0 | 1 | X < Y
4478 // 1 | 0 | 0 | X == Y
4479 // 1 | 1 | 1 | unordered
4480 switch (SetCCOpcode) {
4481 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4481)
;
4482 case ISD::SETUEQ:
4483 case ISD::SETEQ: return X86::COND_E;
4484 case ISD::SETOLT: // flipped
4485 case ISD::SETOGT:
4486 case ISD::SETGT: return X86::COND_A;
4487 case ISD::SETOLE: // flipped
4488 case ISD::SETOGE:
4489 case ISD::SETGE: return X86::COND_AE;
4490 case ISD::SETUGT: // flipped
4491 case ISD::SETULT:
4492 case ISD::SETLT: return X86::COND_B;
4493 case ISD::SETUGE: // flipped
4494 case ISD::SETULE:
4495 case ISD::SETLE: return X86::COND_BE;
4496 case ISD::SETONE:
4497 case ISD::SETNE: return X86::COND_NE;
4498 case ISD::SETUO: return X86::COND_P;
4499 case ISD::SETO: return X86::COND_NP;
4500 case ISD::SETOEQ:
4501 case ISD::SETUNE: return X86::COND_INVALID;
4502 }
4503}
4504
4505/// Is there a floating point cmov for the specific X86 condition code?
4506/// Current x86 isa includes the following FP cmov instructions:
4507/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4508static bool hasFPCMov(unsigned X86CC) {
4509 switch (X86CC) {
4510 default:
4511 return false;
4512 case X86::COND_B:
4513 case X86::COND_BE:
4514 case X86::COND_E:
4515 case X86::COND_P:
4516 case X86::COND_A:
4517 case X86::COND_AE:
4518 case X86::COND_NE:
4519 case X86::COND_NP:
4520 return true;
4521 }
4522}
4523
4524
4525bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4526 const CallInst &I,
4527 MachineFunction &MF,
4528 unsigned Intrinsic) const {
4529
4530 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4531 if (!IntrData)
4532 return false;
4533
4534 Info.opc = ISD::INTRINSIC_W_CHAIN;
4535 Info.flags = MachineMemOperand::MONone;
4536 Info.offset = 0;
4537
4538 switch (IntrData->Type) {
4539 case EXPAND_FROM_MEM: {
4540 Info.ptrVal = I.getArgOperand(0);
4541 Info.memVT = MVT::getVT(I.getType());
4542 Info.align = 1;
4543 Info.flags |= MachineMemOperand::MOLoad;
4544 break;
4545 }
4546 case COMPRESS_TO_MEM: {
4547 Info.ptrVal = I.getArgOperand(0);
4548 Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
4549 Info.align = 1;
4550 Info.flags |= MachineMemOperand::MOStore;
4551 break;
4552 }
4553 case TRUNCATE_TO_MEM_VI8:
4554 case TRUNCATE_TO_MEM_VI16:
4555 case TRUNCATE_TO_MEM_VI32: {
4556 Info.ptrVal = I.getArgOperand(0);
4557 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4558 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4559 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4560 ScalarVT = MVT::i8;
4561 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4562 ScalarVT = MVT::i16;
4563 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4564 ScalarVT = MVT::i32;
4565
4566 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4567 Info.align = 1;
4568 Info.flags |= MachineMemOperand::MOStore;
4569 break;
4570 }
4571 default:
4572 return false;
4573 }
4574
4575 return true;
4576}
4577
4578/// Returns true if the target can instruction select the
4579/// specified FP immediate natively. If false, the legalizer will
4580/// materialize the FP immediate as a load from a constant pool.
4581bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4582 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4583 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4584 return true;
4585 }
4586 return false;
4587}
4588
4589bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4590 ISD::LoadExtType ExtTy,
4591 EVT NewVT) const {
4592 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4593 // relocation target a movq or addq instruction: don't let the load shrink.
4594 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4595 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4596 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4597 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4598 return true;
4599}
4600
4601/// \brief Returns true if it is beneficial to convert a load of a constant
4602/// to just the constant itself.
4603bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4604 Type *Ty) const {
4605 assert(Ty->isIntegerTy())(static_cast <bool> (Ty->isIntegerTy()) ? void (0) :
__assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4605, __extension__ __PRETTY_FUNCTION__))
;
4606
4607 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4608 if (BitSize == 0 || BitSize > 64)
4609 return false;
4610 return true;
4611}
4612
4613bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
4614 // TODO: It might be a win to ease or lift this restriction, but the generic
4615 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
4616 if (VT.isVector() && Subtarget.hasAVX512())
4617 return false;
4618
4619 return true;
4620}
4621
4622bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
4623 unsigned Index) const {
4624 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4625 return false;
4626
4627 // Mask vectors support all subregister combinations and operations that
4628 // extract half of vector.
4629 if (ResVT.getVectorElementType() == MVT::i1)
4630 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
4631 (Index == ResVT.getVectorNumElements()));
4632
4633 return (Index % ResVT.getVectorNumElements()) == 0;
4634}
4635
4636bool X86TargetLowering::isCheapToSpeculateCttz() const {
4637 // Speculate cttz only if we can directly use TZCNT.
4638 return Subtarget.hasBMI();
4639}
4640
4641bool X86TargetLowering::isCheapToSpeculateCtlz() const {
4642 // Speculate ctlz only if we can directly use LZCNT.
4643 return Subtarget.hasLZCNT();
4644}
4645
4646bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
4647 EVT BitcastVT) const {
4648 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1)
4649 return false;
4650
4651 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
4652}
4653
4654bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
4655 const SelectionDAG &DAG) const {
4656 // Do not merge to float value size (128 bytes) if no implicit
4657 // float attribute is set.
4658 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
4659 Attribute::NoImplicitFloat);
4660
4661 if (NoFloat) {
4662 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
4663 return (MemVT.getSizeInBits() <= MaxIntSize);
4664 }
4665 return true;
4666}
4667
4668bool X86TargetLowering::isCtlzFast() const {
4669 return Subtarget.hasFastLZCNT();
4670}
4671
4672bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
4673 const Instruction &AndI) const {
4674 return true;
4675}
4676
4677bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
4678 if (!Subtarget.hasBMI())
4679 return false;
4680
4681 // There are only 32-bit and 64-bit forms for 'andn'.
4682 EVT VT = Y.getValueType();
4683 if (VT != MVT::i32 && VT != MVT::i64)
4684 return false;
4685
4686 return true;
4687}
4688
4689MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
4690 MVT VT = MVT::getIntegerVT(NumBits);
4691 if (isTypeLegal(VT))
4692 return VT;
4693
4694 // PMOVMSKB can handle this.
4695 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
4696 return MVT::v16i8;
4697
4698 // VPMOVMSKB can handle this.
4699 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
4700 return MVT::v32i8;
4701
4702 // TODO: Allow 64-bit type for 32-bit target.
4703 // TODO: 512-bit types should be allowed, but make sure that those
4704 // cases are handled in combineVectorSizedSetCCEquality().
4705
4706 return MVT::INVALID_SIMPLE_VALUE_TYPE;
4707}
4708
4709/// Val is the undef sentinel value or equal to the specified value.
4710static bool isUndefOrEqual(int Val, int CmpVal) {
4711 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
4712}
4713
4714/// Val is either the undef or zero sentinel value.
4715static bool isUndefOrZero(int Val) {
4716 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
4717}
4718
4719/// Return true if every element in Mask, beginning
4720/// from position Pos and ending in Pos+Size is the undef sentinel value.
4721static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
4722 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4723 if (Mask[i] != SM_SentinelUndef)
4724 return false;
4725 return true;
4726}
4727
4728/// Return true if Val is undef or if its value falls within the
4729/// specified range (L, H].
4730static bool isUndefOrInRange(int Val, int Low, int Hi) {
4731 return (Val == SM_SentinelUndef) || (Val >= Low && Val < Hi);
4732}
4733
4734/// Return true if every element in Mask is undef or if its value
4735/// falls within the specified range (L, H].
4736static bool isUndefOrInRange(ArrayRef<int> Mask,
4737 int Low, int Hi) {
4738 for (int M : Mask)
4739 if (!isUndefOrInRange(M, Low, Hi))
4740 return false;
4741 return true;
4742}
4743
4744/// Return true if Val is undef, zero or if its value falls within the
4745/// specified range (L, H].
4746static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
4747 return isUndefOrZero(Val) || (Val >= Low && Val < Hi);
4748}
4749
4750/// Return true if every element in Mask is undef, zero or if its value
4751/// falls within the specified range (L, H].
4752static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
4753 for (int M : Mask)
4754 if (!isUndefOrZeroOrInRange(M, Low, Hi))
4755 return false;
4756 return true;
4757}
4758
4759/// Return true if every element in Mask, beginning
4760/// from position Pos and ending in Pos+Size, falls within the specified
4761/// sequential range (Low, Low+Size]. or is undef.
4762static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
4763 unsigned Pos, unsigned Size, int Low) {
4764 for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
4765 if (!isUndefOrEqual(Mask[i], Low))
4766 return false;
4767 return true;
4768}
4769
4770/// Return true if every element in Mask, beginning
4771/// from position Pos and ending in Pos+Size, falls within the specified
4772/// sequential range (Low, Low+Size], or is undef or is zero.
4773static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4774 unsigned Size, int Low) {
4775 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
4776 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
4777 return false;
4778 return true;
4779}
4780
4781/// Return true if every element in Mask, beginning
4782/// from position Pos and ending in Pos+Size is undef or is zero.
4783static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4784 unsigned Size) {
4785 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4786 if (!isUndefOrZero(Mask[i]))
4787 return false;
4788 return true;
4789}
4790
4791/// \brief Helper function to test whether a shuffle mask could be
4792/// simplified by widening the elements being shuffled.
4793///
4794/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
4795/// leaves it in an unspecified state.
4796///
4797/// NOTE: This must handle normal vector shuffle masks and *target* vector
4798/// shuffle masks. The latter have the special property of a '-2' representing
4799/// a zero-ed lane of a vector.
4800static bool canWidenShuffleElements(ArrayRef<int> Mask,
4801 SmallVectorImpl<int> &WidenedMask) {
4802 WidenedMask.assign(Mask.size() / 2, 0);
4803 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
4804 int M0 = Mask[i];
4805 int M1 = Mask[i + 1];
4806
4807 // If both elements are undef, its trivial.
4808 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
4809 WidenedMask[i / 2] = SM_SentinelUndef;
4810 continue;
4811 }
4812
4813 // Check for an undef mask and a mask value properly aligned to fit with
4814 // a pair of values. If we find such a case, use the non-undef mask's value.
4815 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
4816 WidenedMask[i / 2] = M1 / 2;
4817 continue;
4818 }
4819 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
4820 WidenedMask[i / 2] = M0 / 2;
4821 continue;
4822 }
4823
4824 // When zeroing, we need to spread the zeroing across both lanes to widen.
4825 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
4826 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
4827 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
4828 WidenedMask[i / 2] = SM_SentinelZero;
4829 continue;
4830 }
4831 return false;
4832 }
4833
4834 // Finally check if the two mask values are adjacent and aligned with
4835 // a pair.
4836 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
4837 WidenedMask[i / 2] = M0 / 2;
4838 continue;
4839 }
4840
4841 // Otherwise we can't safely widen the elements used in this shuffle.
4842 return false;
4843 }
4844 assert(WidenedMask.size() == Mask.size() / 2 &&(static_cast <bool> (WidenedMask.size() == Mask.size() /
2 && "Incorrect size of mask after widening the elements!"
) ? void (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4845, __extension__ __PRETTY_FUNCTION__))
4845 "Incorrect size of mask after widening the elements!")(static_cast <bool> (WidenedMask.size() == Mask.size() /
2 && "Incorrect size of mask after widening the elements!"
) ? void (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4845, __extension__ __PRETTY_FUNCTION__))
;
4846
4847 return true;
4848}
4849
4850/// Returns true if Elt is a constant zero or a floating point constant +0.0.
4851bool X86::isZeroNode(SDValue Elt) {
4852 return isNullConstant(Elt) || isNullFPConstant(Elt);
4853}
4854
4855// Build a vector of constants.
4856// Use an UNDEF node if MaskElt == -1.
4857// Split 64-bit constants in the 32-bit mode.
4858static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
4859 const SDLoc &dl, bool IsMask = false) {
4860
4861 SmallVector<SDValue, 32> Ops;
4862 bool Split = false;
4863
4864 MVT ConstVecVT = VT;
4865 unsigned NumElts = VT.getVectorNumElements();
4866 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
4867 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
4868 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
4869 Split = true;
4870 }
4871
4872 MVT EltVT = ConstVecVT.getVectorElementType();
4873 for (unsigned i = 0; i < NumElts; ++i) {
4874 bool IsUndef = Values[i] < 0 && IsMask;
4875 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
4876 DAG.getConstant(Values[i], dl, EltVT);
4877 Ops.push_back(OpNode);
4878 if (Split)
4879 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
4880 DAG.getConstant(0, dl, EltVT));
4881 }
4882 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
4883 if (Split)
4884 ConstsNode = DAG.getBitcast(VT, ConstsNode);
4885 return ConstsNode;
4886}
4887
4888static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
4889 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4890 assert(Bits.size() == Undefs.getBitWidth() &&(static_cast <bool> (Bits.size() == Undefs.getBitWidth(
) && "Unequal constant and undef arrays") ? void (0) :
__assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4891, __extension__ __PRETTY_FUNCTION__))
4891 "Unequal constant and undef arrays")(static_cast <bool> (Bits.size() == Undefs.getBitWidth(
) && "Unequal constant and undef arrays") ? void (0) :
__assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4891, __extension__ __PRETTY_FUNCTION__))
;
4892 SmallVector<SDValue, 32> Ops;
4893 bool Split = false;
4894
4895 MVT ConstVecVT = VT;
4896 unsigned NumElts = VT.getVectorNumElements();
4897 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
4898 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
4899 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
4900 Split = true;
4901 }
4902
4903 MVT EltVT = ConstVecVT.getVectorElementType();
4904 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
4905 if (Undefs[i]) {
4906 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
4907 continue;
4908 }
4909 const APInt &V = Bits[i];
4910 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")(static_cast <bool> (V.getBitWidth() == VT.getScalarSizeInBits
() && "Unexpected sizes") ? void (0) : __assert_fail (
"V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4910, __extension__ __PRETTY_FUNCTION__))
;
4911 if (Split) {
4912 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
4913 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
4914 } else if (EltVT == MVT::f32) {
4915 APFloat FV(APFloat::IEEEsingle(), V);
4916 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
4917 } else if (EltVT == MVT::f64) {
4918 APFloat FV(APFloat::IEEEdouble(), V);
4919 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
4920 } else {
4921 Ops.push_back(DAG.getConstant(V, dl, EltVT));
4922 }
4923 }
4924
4925 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
4926 return DAG.getBitcast(VT, ConstsNode);
4927}
4928
4929/// Returns a vector of specified type with all zero elements.
4930static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
4931 SelectionDAG &DAG, const SDLoc &dl) {
4932 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4934, __extension__ __PRETTY_FUNCTION__))
4933 VT.getVectorElementType() == MVT::i1) &&(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4934, __extension__ __PRETTY_FUNCTION__))
4934 "Unexpected vector type")(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector() || VT.getVectorElementType() == MVT
::i1) && "Unexpected vector type") ? void (0) : __assert_fail
("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4934, __extension__ __PRETTY_FUNCTION__))
;
4935
4936 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
4937 // type. This ensures they get CSE'd. But if the integer type is not
4938 // available, use a floating-point +0.0 instead.
4939 SDValue Vec;
4940 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
4941 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
4942 } else if (VT.getVectorElementType() == MVT::i1) {
4943 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(static_cast <bool> ((Subtarget.hasBWI() || VT.getVectorNumElements
() <= 16) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4944, __extension__ __PRETTY_FUNCTION__))
4944 "Unexpected vector type")(static_cast <bool> ((Subtarget.hasBWI() || VT.getVectorNumElements
() <= 16) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4944, __extension__ __PRETTY_FUNCTION__))
;
4945 assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&(static_cast <bool> ((Subtarget.hasVLX() || VT.getVectorNumElements
() >= 8) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4946, __extension__ __PRETTY_FUNCTION__))
4946 "Unexpected vector type")(static_cast <bool> ((Subtarget.hasVLX() || VT.getVectorNumElements
() >= 8) && "Unexpected vector type") ? void (0) :
__assert_fail ("(Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && \"Unexpected vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4946, __extension__ __PRETTY_FUNCTION__))
;
4947 Vec = DAG.getConstant(0, dl, VT);
4948 } else {
4949 unsigned Num32BitElts = VT.getSizeInBits() / 32;
4950 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
4951 }
4952 return DAG.getBitcast(VT, Vec);
4953}
4954
4955static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
4956 const SDLoc &dl, unsigned vectorWidth) {
4957 EVT VT = Vec.getValueType();
4958 EVT ElVT = VT.getVectorElementType();
4959 unsigned Factor = VT.getSizeInBits()/vectorWidth;
4960 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
4961 VT.getVectorNumElements()/Factor);
4962
4963 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
4964 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
4965 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")(static_cast <bool> (isPowerOf2_32(ElemsPerChunk) &&
"Elements per chunk not power of 2") ? void (0) : __assert_fail
("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4965, __extension__ __PRETTY_FUNCTION__))
;
4966
4967 // This is the index of the first element of the vectorWidth-bit chunk
4968 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
4969 IdxVal &= ~(ElemsPerChunk - 1);
4970
4971 // If the input is a buildvector just emit a smaller one.
4972 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
4973 return DAG.getBuildVector(ResultVT, dl,
4974 Vec->ops().slice(IdxVal, ElemsPerChunk));
4975
4976 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
4977 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
4978}
4979
4980/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
4981/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
4982/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
4983/// instructions or a simple subregister reference. Idx is an index in the
4984/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
4985/// lowering EXTRACT_VECTOR_ELT operations easier.
4986static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
4987 SelectionDAG &DAG, const SDLoc &dl) {
4988 assert((Vec.getValueType().is256BitVector() ||(static_cast <bool> ((Vec.getValueType().is256BitVector
() || Vec.getValueType().is512BitVector()) && "Unexpected vector size!"
) ? void (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4989, __extension__ __PRETTY_FUNCTION__))
4989 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(static_cast <bool> ((Vec.getValueType().is256BitVector
() || Vec.getValueType().is512BitVector()) && "Unexpected vector size!"
) ? void (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4989, __extension__ __PRETTY_FUNCTION__))
;
4990 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
4991}
4992
4993/// Generate a DAG to grab 256-bits from a 512-bit vector.
4994static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
4995 SelectionDAG &DAG, const SDLoc &dl) {
4996 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")(static_cast <bool> (Vec.getValueType().is512BitVector(
) && "Unexpected vector size!") ? void (0) : __assert_fail
("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 4996, __extension__ __PRETTY_FUNCTION__))
;
4997 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
4998}
4999
5000static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5001 SelectionDAG &DAG, const SDLoc &dl,
5002 unsigned vectorWidth) {
5003 assert((vectorWidth == 128 || vectorWidth == 256) &&(static_cast <bool> ((vectorWidth == 128 || vectorWidth
== 256) && "Unsupported vector width") ? void (0) : __assert_fail
("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5004, __extension__ __PRETTY_FUNCTION__))
5004 "Unsupported vector width")(static_cast <bool> ((vectorWidth == 128 || vectorWidth
== 256) && "Unsupported vector width") ? void (0) : __assert_fail
("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5004, __extension__ __PRETTY_FUNCTION__))
;
5005 // Inserting UNDEF is Result
5006 if (Vec.isUndef())
5007 return Result;
5008 EVT VT = Vec.getValueType();
5009 EVT ElVT = VT.getVectorElementType();
5010 EVT ResultVT = Result.getValueType();
5011
5012 // Insert the relevant vectorWidth bits.
5013 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5014 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")(static_cast <bool> (isPowerOf2_32(ElemsPerChunk) &&
"Elements per chunk not power of 2") ? void (0) : __assert_fail
("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5014, __extension__ __PRETTY_FUNCTION__))
;
5015
5016 // This is the index of the first element of the vectorWidth-bit chunk
5017 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5018 IdxVal &= ~(ElemsPerChunk - 1);
5019
5020 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5021 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5022}
5023
5024/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5025/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5026/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5027/// simple superregister reference. Idx is an index in the 128 bits
5028/// we want. It need not be aligned to a 128-bit boundary. That makes
5029/// lowering INSERT_VECTOR_ELT operations easier.
5030static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5031 SelectionDAG &DAG, const SDLoc &dl) {
5032 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")(static_cast <bool> (Vec.getValueType().is128BitVector(
) && "Unexpected vector size!") ? void (0) : __assert_fail
("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5032, __extension__ __PRETTY_FUNCTION__))
;
5033 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5034}
5035
5036static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5037 SelectionDAG &DAG, const SDLoc &dl) {
5038 assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!")(static_cast <bool> (Vec.getValueType().is256BitVector(
) && "Unexpected vector size!") ? void (0) : __assert_fail
("Vec.getValueType().is256BitVector() && \"Unexpected vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5038, __extension__ __PRETTY_FUNCTION__))
;
5039 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
5040}
5041
5042// Return true if the instruction zeroes the unused upper part of the
5043// destination and accepts mask.
5044static bool isMaskedZeroUpperBitsvXi1(unsigned int Opcode) {
5045 switch (Opcode) {
5046 default:
5047 return false;
5048 case X86ISD::TESTM:
5049 case X86ISD::TESTNM:
5050 case X86ISD::PCMPEQM:
5051 case X86ISD::PCMPGTM:
5052 case X86ISD::CMPM:
5053 case X86ISD::CMPMU:
5054 case X86ISD::CMPM_RND:
5055 return true;
5056 }
5057}
5058
5059/// Insert i1-subvector to i1-vector.
5060static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5061 const X86Subtarget &Subtarget) {
5062
5063 SDLoc dl(Op);
5064 SDValue Vec = Op.getOperand(0);
5065 SDValue SubVec = Op.getOperand(1);
5066 SDValue Idx = Op.getOperand(2);
5067
5068 if (!isa<ConstantSDNode>(Idx))
5069 return SDValue();
5070
5071 // Inserting undef is a nop. We can just return the original vector.
5072 if (SubVec.isUndef())
5073 return Vec;
5074
5075 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5076 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5077 return Op;
5078
5079 MVT OpVT = Op.getSimpleValueType();
5080 unsigned NumElems = OpVT.getVectorNumElements();
5081
5082 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5083
5084 // Extend to natively supported kshift.
5085 MVT WideOpVT = OpVT;
5086 if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8)
5087 WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5088
5089 // Inserting into the lsbs of a zero vector is legal. ISel will insert shifts
5090 // if necessary.
5091 if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) {
5092 // May need to promote to a legal type.
5093 Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5094 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5095 SubVec, Idx);
5096 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5097 }
5098
5099 MVT SubVecVT = SubVec.getSimpleValueType();
5100 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5101
5102 assert(IdxVal + SubVecNumElems <= NumElems &&(static_cast <bool> (IdxVal + SubVecNumElems <= NumElems
&& IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR") ? void (0) : __assert_fail
("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5104, __extension__ __PRETTY_FUNCTION__))
5103 IdxVal % SubVecVT.getSizeInBits() == 0 &&(static_cast <bool> (IdxVal + SubVecNumElems <= NumElems
&& IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR") ? void (0) : __assert_fail
("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5104, __extension__ __PRETTY_FUNCTION__))
5104 "Unexpected index value in INSERT_SUBVECTOR")(static_cast <bool> (IdxVal + SubVecNumElems <= NumElems
&& IdxVal % SubVecVT.getSizeInBits() == 0 &&
"Unexpected index value in INSERT_SUBVECTOR") ? void (0) : __assert_fail
("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5104, __extension__ __PRETTY_FUNCTION__))
;
5105
5106 SDValue Undef = DAG.getUNDEF(WideOpVT);
5107
5108 if (IdxVal == 0) {
5109 // Zero lower bits of the Vec
5110 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5111 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec,
5112 ZeroIdx);
5113 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5114 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5115 // Merge them together, SubVec should be zero extended.
5116 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5117 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5118 SubVec, ZeroIdx);
5119 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5120 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5121 }
5122
5123 SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5124 Undef, SubVec, ZeroIdx);
5125
5126 if (Vec.isUndef()) {
5127 assert(IdxVal != 0 && "Unexpected index")(static_cast <bool> (IdxVal != 0 && "Unexpected index"
) ? void (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5127, __extension__ __PRETTY_FUNCTION__))
;
5128 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5129 DAG.getConstant(IdxVal, dl, MVT::i8));
5130 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5131 }
5132
5133 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5134 assert(IdxVal != 0 && "Unexpected index")(static_cast <bool> (IdxVal != 0 && "Unexpected index"
) ? void (0) : __assert_fail ("IdxVal != 0 && \"Unexpected index\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5134, __extension__ __PRETTY_FUNCTION__))
;
5135 NumElems = WideOpVT.getVectorNumElements();
5136 unsigned ShiftLeft = NumElems - SubVecNumElems;
5137 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5138 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5139 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5140 if (ShiftRight != 0)
5141 SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec,
5142 DAG.getConstant(ShiftRight, dl, MVT::i8));
5143 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx);
5144 }
5145
5146 // Simple case when we put subvector in the upper part
5147 if (IdxVal + SubVecNumElems == NumElems) {
5148 SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec,
5149 DAG.getConstant(IdxVal, dl, MVT::i8));
5150 if (SubVecNumElems * 2 == NumElems) {
5151 // Special case, use legal zero extending insert_subvector. This allows
5152 // isel to opimitize when bits are known zero.
5153 Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx);
5154 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5155 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5156 Vec, ZeroIdx);
5157 } else {
5158 // Otherwise use explicit shifts to zero the bits.
5159 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5160 Undef, Vec, ZeroIdx);
5161 NumElems = WideOpVT.getVectorNumElements();
5162 SDValue ShiftBits = DAG.getConstant(NumElems - IdxVal, dl, MVT::i8);
5163 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5164 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5165 }
5166 Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec);
5167 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5168 }
5169
5170 // Inserting into the middle is more complicated.
5171
5172 NumElems = WideOpVT.getVectorNumElements();
5173
5174 // Widen the vector if needed.
5175 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5176 // Move the current value of the bit to be replace to the lsbs.
5177 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5178 DAG.getConstant(IdxVal, dl, MVT::i8));
5179 // Xor with the new bit.
5180 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Op, SubVec);
5181 // Shift to MSB, filling bottom bits with 0.
5182 unsigned ShiftLeft = NumElems - SubVecNumElems;
5183 Op = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Op,
5184 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5185 // Shift to the final position, filling upper bits with 0.
5186 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5187 Op = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Op,
5188 DAG.getConstant(ShiftRight, dl, MVT::i8));
5189 // Xor with original vector leaving the new value.
5190 Op = DAG.getNode(ISD::XOR, dl, WideOpVT, Vec, Op);
5191 // Reduce to original width if needed.
5192 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx);
5193}
5194
5195/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
5196/// instructions. This is used because creating CONCAT_VECTOR nodes of
5197/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
5198/// large BUILD_VECTORS.
5199static SDValue concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
5200 unsigned NumElems, SelectionDAG &DAG,
5201 const SDLoc &dl) {
5202 SDValue V = insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
5203 return insert128BitVector(V, V2, NumElems / 2, DAG, dl);
5204}
5205
5206static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
5207 unsigned NumElems, SelectionDAG &DAG,
5208 const SDLoc &dl) {
5209 SDValue V = insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
5210 return insert256BitVector(V, V2, NumElems / 2, DAG, dl);
5211}
5212
5213/// Returns a vector of specified type with all bits set.
5214/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5215/// Then bitcast to their original type, ensuring they get CSE'd.
5216static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5217 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector()) && "Expected a 128/256/512-bit vector type"
) ? void (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5218, __extension__ __PRETTY_FUNCTION__))
5218 "Expected a 128/256/512-bit vector type")(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector()) && "Expected a 128/256/512-bit vector type"
) ? void (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5218, __extension__ __PRETTY_FUNCTION__))
;
5219
5220 APInt Ones = APInt::getAllOnesValue(32);
5221 unsigned NumElts = VT.getSizeInBits() / 32;
5222 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5223 return DAG.getBitcast(VT, Vec);
5224}
5225
5226static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
5227 SelectionDAG &DAG) {
5228 EVT InVT = In.getValueType();
5229 assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode")(static_cast <bool> ((X86ISD::VSEXT == Opc || X86ISD::VZEXT
== Opc) && "Unexpected opcode") ? void (0) : __assert_fail
("(X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && \"Unexpected opcode\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5229, __extension__ __PRETTY_FUNCTION__))
;
5230
5231 if (VT.is128BitVector() && InVT.is128BitVector())
5232 return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
5233 : DAG.getZeroExtendVectorInReg(In, DL, VT);
5234
5235 // For 256-bit vectors, we only need the lower (128-bit) input half.
5236 // For 512-bit vectors, we only need the lower input half or quarter.
5237 if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
5238 int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
5239 In = extractSubVector(In, 0, DAG, DL,
5240 std::max(128, (int)VT.getSizeInBits() / Scale));
5241 }
5242
5243 return DAG.getNode(Opc, DL, VT, In);
5244}
5245
5246/// Returns a vector_shuffle node for an unpackl operation.
5247static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5248 SDValue V1, SDValue V2) {
5249 SmallVector<int, 8> Mask;
5250 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
5251 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5252}
5253
5254/// Returns a vector_shuffle node for an unpackh operation.
5255static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5256 SDValue V1, SDValue V2) {
5257 SmallVector<int, 8> Mask;
5258 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
5259 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5260}
5261
5262/// Return a vector_shuffle of the specified vector of zero or undef vector.
5263/// This produces a shuffle where the low element of V2 is swizzled into the
5264/// zero/undef vector, landing at element Idx.
5265/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
5266static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
5267 bool IsZero,
5268 const X86Subtarget &Subtarget,
5269 SelectionDAG &DAG) {
5270 MVT VT = V2.getSimpleValueType();
5271 SDValue V1 = IsZero
5272 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5273 int NumElems = VT.getVectorNumElements();
5274 SmallVector<int, 16> MaskVec(NumElems);
5275 for (int i = 0; i != NumElems; ++i)
5276 // If this is the insertion idx, put the low elt of V2 here.
5277 MaskVec[i] = (i == Idx) ? NumElems : i;
5278 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
5279}
5280
5281static SDValue peekThroughBitcasts(SDValue V) {
5282 while (V.getNode() && V.getOpcode() == ISD::BITCAST)
5283 V = V.getOperand(0);
5284 return V;
5285}
5286
5287static SDValue peekThroughOneUseBitcasts(SDValue V) {
5288 while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
5289 V.getOperand(0).hasOneUse())
5290 V = V.getOperand(0);
5291 return V;
5292}
5293
5294static const Constant *getTargetConstantFromNode(SDValue Op) {
5295 Op = peekThroughBitcasts(Op);
5296
5297 auto *Load = dyn_cast<LoadSDNode>(Op);
5298 if (!Load)
5299 return nullptr;
5300
5301 SDValue Ptr = Load->getBasePtr();
5302 if (Ptr->getOpcode() == X86ISD::Wrapper ||
5303 Ptr->getOpcode() == X86ISD::WrapperRIP)
5304 Ptr = Ptr->getOperand(0);
5305
5306 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
5307 if (!CNode || CNode->isMachineConstantPoolEntry())
5308 return nullptr;
5309
5310 return dyn_cast<Constant>(CNode->getConstVal());
5311}
5312
5313// Extract raw constant bits from constant pools.
5314static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
5315 APInt &UndefElts,
5316 SmallVectorImpl<APInt> &EltBits,
5317 bool AllowWholeUndefs = true,
5318 bool AllowPartialUndefs = true) {
5319 assert(EltBits.empty() && "Expected an empty EltBits vector")(static_cast <bool> (EltBits.empty() && "Expected an empty EltBits vector"
) ? void (0) : __assert_fail ("EltBits.empty() && \"Expected an empty EltBits vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5319, __extension__ __PRETTY_FUNCTION__))
;
5320
5321 Op = peekThroughBitcasts(Op);
5322
5323 EVT VT = Op.getValueType();
5324 unsigned SizeInBits = VT.getSizeInBits();
5325 assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!")(static_cast <bool> ((SizeInBits % EltSizeInBits) == 0 &&
"Can't split constant!") ? void (0) : __assert_fail ("(SizeInBits % EltSizeInBits) == 0 && \"Can't split constant!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5325, __extension__ __PRETTY_FUNCTION__))
;
5326 unsigned NumElts = SizeInBits / EltSizeInBits;
5327
5328 // Bitcast a source array of element bits to the target size.
5329 auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) {
5330 unsigned NumSrcElts = UndefSrcElts.getBitWidth();
5331 unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth();
5332 assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits &&(static_cast <bool> ((NumSrcElts * SrcEltSizeInBits) ==
SizeInBits && "Constant bit sizes don't match") ? void
(0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5333, __extension__ __PRETTY_FUNCTION__))
5333 "Constant bit sizes don't match")(static_cast <bool> ((NumSrcElts * SrcEltSizeInBits) ==
SizeInBits && "Constant bit sizes don't match") ? void
(0) : __assert_fail ("(NumSrcElts * SrcEltSizeInBits) == SizeInBits && \"Constant bit sizes don't match\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5333, __extension__ __PRETTY_FUNCTION__))
;
5334
5335 // Don't split if we don't allow undef bits.
5336 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
5337 if (UndefSrcElts.getBoolValue() && !AllowUndefs)
5338 return false;
5339
5340 // If we're already the right size, don't bother bitcasting.
5341 if (NumSrcElts == NumElts) {
5342 UndefElts = UndefSrcElts;
5343 EltBits.assign(SrcEltBits.begin(), SrcEltBits.end());
5344 return true;
5345 }
5346
5347 // Extract all the undef/constant element data and pack into single bitsets.
5348 APInt UndefBits(SizeInBits, 0);
5349 APInt MaskBits(SizeInBits, 0);
5350
5351 for (unsigned i = 0; i != NumSrcElts; ++i) {
5352 unsigned BitOffset = i * SrcEltSizeInBits;
5353 if (UndefSrcElts[i])
5354 UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
5355 MaskBits.insertBits(SrcEltBits[i], BitOffset);
5356 }
5357
5358 // Split the undef/constant single bitset data into the target elements.
5359 UndefElts = APInt(NumElts, 0);
5360 EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
5361
5362 for (unsigned i = 0; i != NumElts; ++i) {
5363 unsigned BitOffset = i * EltSizeInBits;
5364 APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
5365
5366 // Only treat an element as UNDEF if all bits are UNDEF.
5367 if (UndefEltBits.isAllOnesValue()) {
5368 if (!AllowWholeUndefs)
5369 return false;
5370 UndefElts.setBit(i);
5371 continue;
5372 }
5373
5374 // If only some bits are UNDEF then treat them as zero (or bail if not
5375 // supported).
5376 if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
5377 return false;
5378
5379 APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
5380 EltBits[i] = Bits.getZExtValue();
5381 }
5382 return true;
5383 };
5384
5385 // Collect constant bits and insert into mask/undef bit masks.
5386 auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
5387 unsigned UndefBitIndex) {
5388 if (!Cst)
5389 return false;
5390 if (isa<UndefValue>(Cst)) {
5391 Undefs.setBit(UndefBitIndex);
5392 return true;
5393 }
5394 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5395 Mask = CInt->getValue();
5396 return true;
5397 }
5398 if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5399 Mask = CFP->getValueAPF().bitcastToAPInt();
5400 return true;
5401 }
5402 return false;
5403 };
5404
5405 // Handle UNDEFs.
5406 if (Op.isUndef()) {
5407 APInt UndefSrcElts = APInt::getAllOnesValue(NumElts);
5408 SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0));
5409 return CastBitData(UndefSrcElts, SrcEltBits);
5410 }
5411
5412 // Extract scalar constant bits.
5413 if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) {
5414 APInt UndefSrcElts = APInt::getNullValue(1);
5415 SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue());
5416 return CastBitData(UndefSrcElts, SrcEltBits);
5417 }
5418
5419 // Extract constant bits from build vector.
5420 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
5421 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5422 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5423
5424 APInt UndefSrcElts(NumSrcElts, 0);
5425 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5426 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5427 const SDValue &Src = Op.getOperand(i);
5428 if (Src.isUndef()) {
5429 UndefSrcElts.setBit(i);
5430 continue;
5431 }
5432 auto *Cst = cast<ConstantSDNode>(Src);
5433 SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5434 }
5435 return CastBitData(UndefSrcElts, SrcEltBits);
5436 }
5437
5438 // Extract constant bits from constant pool vector.
5439 if (auto *Cst = getTargetConstantFromNode(Op)) {
5440 Type *CstTy = Cst->getType();
5441 if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
5442 return false;
5443
5444 unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits();
5445 unsigned NumSrcElts = CstTy->getVectorNumElements();
5446
5447 APInt UndefSrcElts(NumSrcElts, 0);
5448 SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0));
5449 for (unsigned i = 0; i != NumSrcElts; ++i)
5450 if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i],
5451 UndefSrcElts, i))
5452 return false;
5453
5454 return CastBitData(UndefSrcElts, SrcEltBits);
5455 }
5456
5457 // Extract constant bits from a broadcasted constant pool scalar.
5458 if (Op.getOpcode() == X86ISD::VBROADCAST &&
5459 EltSizeInBits <= VT.getScalarSizeInBits()) {
5460 if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
5461 unsigned SrcEltSizeInBits = Broadcast->getType()->getScalarSizeInBits();
5462 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5463
5464 APInt UndefSrcElts(NumSrcElts, 0);
5465 SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0));
5466 if (CollectConstantBits(Broadcast, SrcEltBits[0], UndefSrcElts, 0)) {
5467 if (UndefSrcElts[0])
5468 UndefSrcElts.setBits(0, NumSrcElts);
5469 SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]);
5470 return CastBitData(UndefSrcElts, SrcEltBits);
5471 }
5472 }
5473 }
5474
5475 // Extract a rematerialized scalar constant insertion.
5476 if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
5477 Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
5478 isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
5479 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5480 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5481
5482 APInt UndefSrcElts(NumSrcElts, 0);
5483 SmallVector<APInt, 64> SrcEltBits;
5484 auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
5485 SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits));
5486 SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0));
5487 return CastBitData(UndefSrcElts, SrcEltBits);
5488 }
5489
5490 return false;
5491}
5492
5493static bool getTargetShuffleMaskIndices(SDValue MaskNode,
5494 unsigned MaskEltSizeInBits,
5495 SmallVectorImpl<uint64_t> &RawMask) {
5496 APInt UndefElts;
5497 SmallVector<APInt, 64> EltBits;
5498
5499 // Extract the raw target constant bits.
5500 // FIXME: We currently don't support UNDEF bits or mask entries.
5501 if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
5502 EltBits, /* AllowWholeUndefs */ false,
5503 /* AllowPartialUndefs */ false))
5504 return false;
5505
5506 // Insert the extracted elements into the mask.
5507 for (APInt Elt : EltBits)
5508 RawMask.push_back(Elt.getZExtValue());
5509
5510 return true;
5511}
5512
5513/// Create a shuffle mask that matches the PACKSS/PACKUS truncation.
5514/// Note: This ignores saturation, so inputs must be checked first.
5515static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask,
5516 bool Unary) {
5517 assert(Mask.empty() && "Expected an empty shuffle mask vector")(static_cast <bool> (Mask.empty() && "Expected an empty shuffle mask vector"
) ? void (0) : __assert_fail ("Mask.empty() && \"Expected an empty shuffle mask vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5517, __extension__ __PRETTY_FUNCTION__))
;
5518 unsigned NumElts = VT.getVectorNumElements();
5519 unsigned NumLanes = VT.getSizeInBits() / 128;
5520 unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits();
5521 unsigned Offset = Unary ? 0 : NumElts;
5522
5523 for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
5524 for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
5525 Mask.push_back(Elt + (Lane * NumEltsPerLane));
5526 for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += 2)
5527 Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset);
5528 }
5529}
5530
5531/// Calculates the shuffle mask corresponding to the target-specific opcode.
5532/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
5533/// operands in \p Ops, and returns true.
5534/// Sets \p IsUnary to true if only one source is used. Note that this will set
5535/// IsUnary for shuffles which use a single input multiple times, and in those
5536/// cases it will adjust the mask to only have indices within that single input.
5537/// It is an error to call this with non-empty Mask/Ops vectors.
5538static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
5539 SmallVectorImpl<SDValue> &Ops,
5540 SmallVectorImpl<int> &Mask, bool &IsUnary) {
5541 unsigned NumElems = VT.getVectorNumElements();
5542 SDValue ImmN;
5543
5544 assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector")(static_cast <bool> (Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"
) ? void (0) : __assert_fail ("Mask.empty() && \"getTargetShuffleMask expects an empty Mask vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5544, __extension__ __PRETTY_FUNCTION__))
;
5545 assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector")(static_cast <bool> (Ops.empty() && "getTargetShuffleMask expects an empty Ops vector"
) ? void (0) : __assert_fail ("Ops.empty() && \"getTargetShuffleMask expects an empty Ops vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5545, __extension__ __PRETTY_FUNCTION__))
;
5546
5547 IsUnary = false;
5548 bool IsFakeUnary = false;
5549 switch(N->getOpcode()) {
5550 case X86ISD::BLENDI:
5551 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5551, __extension__ __PRETTY_FUNCTION__))
;
5552 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5552, __extension__ __PRETTY_FUNCTION__))
;
5553 ImmN = N->getOperand(N->getNumOperands()-1);
5554 DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5555 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5556 break;
5557 case X86ISD::SHUFP:
5558 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5558, __extension__ __PRETTY_FUNCTION__))
;
5559 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5559, __extension__ __PRETTY_FUNCTION__))
;
5560 ImmN = N->getOperand(N->getNumOperands()-1);
5561 DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5562 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5563 break;
5564 case X86ISD::INSERTPS:
5565 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5565, __extension__ __PRETTY_FUNCTION__))
;
5566 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5566, __extension__ __PRETTY_FUNCTION__))
;
5567 ImmN = N->getOperand(N->getNumOperands()-1);
5568 DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5569 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5570 break;
5571 case X86ISD::EXTRQI:
5572 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5572, __extension__ __PRETTY_FUNCTION__))
;
5573 if (isa<ConstantSDNode>(N->getOperand(1)) &&
5574 isa<ConstantSDNode>(N->getOperand(2))) {
5575 int BitLen = N->getConstantOperandVal(1);
5576 int BitIdx = N->getConstantOperandVal(2);
5577 DecodeEXTRQIMask(VT, BitLen, BitIdx, Mask);
5578 IsUnary = true;
5579 }
5580 break;
5581 case X86ISD::INSERTQI:
5582 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5582, __extension__ __PRETTY_FUNCTION__))
;
5583 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5583, __extension__ __PRETTY_FUNCTION__))
;
5584 if (isa<ConstantSDNode>(N->getOperand(2)) &&
5585 isa<ConstantSDNode>(N->getOperand(3))) {
5586 int BitLen = N->getConstantOperandVal(2);
5587 int BitIdx = N->getConstantOperandVal(3);
5588 DecodeINSERTQIMask(VT, BitLen, BitIdx, Mask);
5589 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5590 }
5591 break;
5592 case X86ISD::UNPCKH:
5593 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5593, __extension__ __PRETTY_FUNCTION__))
;
5594 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5594, __extension__ __PRETTY_FUNCTION__))
;
5595 DecodeUNPCKHMask(VT, Mask);
5596 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5597 break;
5598 case X86ISD::UNPCKL:
5599 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5599, __extension__ __PRETTY_FUNCTION__))
;
5600 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5600, __extension__ __PRETTY_FUNCTION__))
;
5601 DecodeUNPCKLMask(VT, Mask);
5602 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5603 break;
5604 case X86ISD::MOVHLPS:
5605 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5605, __extension__ __PRETTY_FUNCTION__))
;
5606 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5606, __extension__ __PRETTY_FUNCTION__))
;
5607 DecodeMOVHLPSMask(NumElems, Mask);
5608 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5609 break;
5610 case X86ISD::MOVLHPS:
5611 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5611, __extension__ __PRETTY_FUNCTION__))
;
5612 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5612, __extension__ __PRETTY_FUNCTION__))
;
5613 DecodeMOVLHPSMask(NumElems, Mask);
5614 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5615 break;
5616 case X86ISD::PALIGNR:
5617 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5617, __extension__ __PRETTY_FUNCTION__))
;
5618 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5618, __extension__ __PRETTY_FUNCTION__))
;
5619 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5619, __extension__ __PRETTY_FUNCTION__))
;
5620 ImmN = N->getOperand(N->getNumOperands()-1);
5621 DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5622 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5623 Ops.push_back(N->getOperand(1));
5624 Ops.push_back(N->getOperand(0));
5625 break;
5626 case X86ISD::VSHLDQ:
5627 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5627, __extension__ __PRETTY_FUNCTION__))
;
5628 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5628, __extension__ __PRETTY_FUNCTION__))
;
5629 ImmN = N->getOperand(N->getNumOperands() - 1);
5630 DecodePSLLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5631 IsUnary = true;
5632 break;
5633 case X86ISD::VSRLDQ:
5634 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5634, __extension__ __PRETTY_FUNCTION__))
;
5635 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5635, __extension__ __PRETTY_FUNCTION__))
;
5636 ImmN = N->getOperand(N->getNumOperands() - 1);
5637 DecodePSRLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5638 IsUnary = true;
5639 break;
5640 case X86ISD::PSHUFD:
5641 case X86ISD::VPERMILPI:
5642 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5642, __extension__ __PRETTY_FUNCTION__))
;
5643 ImmN = N->getOperand(N->getNumOperands()-1);
5644 DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5645 IsUnary = true;
5646 break;
5647 case X86ISD::PSHUFHW:
5648 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5648, __extension__ __PRETTY_FUNCTION__))
;
5649 ImmN = N->getOperand(N->getNumOperands()-1);
5650 DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5651 IsUnary = true;
5652 break;
5653 case X86ISD::PSHUFLW:
5654 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5654, __extension__ __PRETTY_FUNCTION__))
;
5655 ImmN = N->getOperand(N->getNumOperands()-1);
5656 DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5657 IsUnary = true;
5658 break;
5659 case X86ISD::VZEXT_MOVL:
5660 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5660, __extension__ __PRETTY_FUNCTION__))
;
5661 DecodeZeroMoveLowMask(VT, Mask);
5662 IsUnary = true;
5663 break;
5664 case X86ISD::VBROADCAST: {
5665 SDValue N0 = N->getOperand(0);
5666 // See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so,
5667 // add the pre-extracted value to the Ops vector.
5668 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5669 N0.getOperand(0).getValueType() == VT &&
5670 N0.getConstantOperandVal(1) == 0)
5671 Ops.push_back(N0.getOperand(0));
5672
5673 // We only decode broadcasts of same-sized vectors, unless the broadcast
5674 // came from an extract from the original width. If we found one, we
5675 // pushed it the Ops vector above.
5676 if (N0.getValueType() == VT || !Ops.empty()) {
5677 DecodeVectorBroadcast(VT, Mask);
5678 IsUnary = true;
5679 break;
5680 }
5681 return false;
5682 }
5683 case X86ISD::VPERMILPV: {
5684 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5684, __extension__ __PRETTY_FUNCTION__))
;
5685 IsUnary = true;
5686 SDValue MaskNode = N->getOperand(1);
5687 unsigned MaskEltSize = VT.getScalarSizeInBits();
5688 SmallVector<uint64_t, 32> RawMask;
5689 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5690 DecodeVPERMILPMask(VT, RawMask, Mask);
5691 break;
5692 }
5693 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5694 DecodeVPERMILPMask(C, MaskEltSize, Mask);
5695 break;
5696 }
5697 return false;
5698 }
5699 case X86ISD::PSHUFB: {
5700 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")(static_cast <bool> (VT.getScalarType() == MVT::i8 &&
"Byte vector expected") ? void (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5700, __extension__ __PRETTY_FUNCTION__))
;
5701 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5701, __extension__ __PRETTY_FUNCTION__))
;
5702 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5702, __extension__ __PRETTY_FUNCTION__))
;
5703 IsUnary = true;
5704 SDValue MaskNode = N->getOperand(1);
5705 SmallVector<uint64_t, 32> RawMask;
5706 if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
5707 DecodePSHUFBMask(RawMask, Mask);
5708 break;
5709 }
5710 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5711 DecodePSHUFBMask(C, Mask);
5712 break;
5713 }
5714 return false;
5715 }
5716 case X86ISD::VPERMI:
5717 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5717, __extension__ __PRETTY_FUNCTION__))
;
5718 ImmN = N->getOperand(N->getNumOperands()-1);
5719 DecodeVPERMMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5720 IsUnary = true;
5721 break;
5722 case X86ISD::MOVSS:
5723 case X86ISD::MOVSD:
5724 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5724, __extension__ __PRETTY_FUNCTION__))
;
5725 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5725, __extension__ __PRETTY_FUNCTION__))
;
5726 DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
5727 break;
5728 case X86ISD::VPERM2X128:
5729 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5729, __extension__ __PRETTY_FUNCTION__))
;
5730 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5730, __extension__ __PRETTY_FUNCTION__))
;
5731 ImmN = N->getOperand(N->getNumOperands()-1);
5732 DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5733 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5734 break;
5735 case X86ISD::MOVSLDUP:
5736 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5736, __extension__ __PRETTY_FUNCTION__))
;
5737 DecodeMOVSLDUPMask(VT, Mask);
5738 IsUnary = true;
5739 break;
5740 case X86ISD::MOVSHDUP:
5741 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5741, __extension__ __PRETTY_FUNCTION__))
;
5742 DecodeMOVSHDUPMask(VT, Mask);
5743 IsUnary = true;
5744 break;
5745 case X86ISD::MOVDDUP:
5746 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5746, __extension__ __PRETTY_FUNCTION__))
;
5747 DecodeMOVDDUPMask(VT, Mask);
5748 IsUnary = true;
5749 break;
5750 case X86ISD::MOVLPD:
5751 case X86ISD::MOVLPS:
5752 // Not yet implemented
5753 return false;
5754 case X86ISD::VPERMIL2: {
5755 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5755, __extension__ __PRETTY_FUNCTION__))
;
5756 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5756, __extension__ __PRETTY_FUNCTION__))
;
5757 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5758 unsigned MaskEltSize = VT.getScalarSizeInBits();
5759 SDValue MaskNode = N->getOperand(2);
5760 SDValue CtrlNode = N->getOperand(3);
5761 if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
5762 unsigned CtrlImm = CtrlOp->getZExtValue();
5763 SmallVector<uint64_t, 32> RawMask;
5764 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5765 DecodeVPERMIL2PMask(VT, CtrlImm, RawMask, Mask);
5766 break;
5767 }
5768 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5769 DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
5770 break;
5771 }
5772 }
5773 return false;
5774 }
5775 case X86ISD::VPPERM: {
5776 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5776, __extension__ __PRETTY_FUNCTION__))
;
5777 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5777, __extension__ __PRETTY_FUNCTION__))
;
5778 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5779 SDValue MaskNode = N->getOperand(2);
5780 SmallVector<uint64_t, 32> RawMask;
5781 if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
5782 DecodeVPPERMMask(RawMask, Mask);
5783 break;
5784 }
5785 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5786 DecodeVPPERMMask(C, Mask);
5787 break;
5788 }
5789 return false;
5790 }
5791 case X86ISD::VPERMV: {
5792 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5792, __extension__ __PRETTY_FUNCTION__))
;
5793 IsUnary = true;
5794 // Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
5795 Ops.push_back(N->getOperand(1));
5796 SDValue MaskNode = N->getOperand(0);
5797 SmallVector<uint64_t, 32> RawMask;
5798 unsigned MaskEltSize = VT.getScalarSizeInBits();
5799 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5800 DecodeVPERMVMask(RawMask, Mask);
5801 break;
5802 }
5803 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5804 DecodeVPERMVMask(C, MaskEltSize, Mask);
5805 break;
5806 }
5807 return false;
5808 }
5809 case X86ISD::VPERMV3: {
5810 assert(N->getOperand(0).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(0).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(0).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5810, __extension__ __PRETTY_FUNCTION__))
;
5811 assert(N->getOperand(2).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(2).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(2).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5811, __extension__ __PRETTY_FUNCTION__))
;
5812 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2);
5813 // Unlike most shuffle nodes, VPERMV3's mask operand is the middle one.
5814 Ops.push_back(N->getOperand(0));
5815 Ops.push_back(N->getOperand(2));
5816 SDValue MaskNode = N->getOperand(1);
5817 unsigned MaskEltSize = VT.getScalarSizeInBits();
5818 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5819 DecodeVPERMV3Mask(C, MaskEltSize, Mask);
5820 break;
5821 }
5822 return false;
5823 }
5824 case X86ISD::VPERMIV3: {
5825 assert(N->getOperand(1).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(1).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(1).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5825, __extension__ __PRETTY_FUNCTION__))
;
5826 assert(N->getOperand(2).getValueType() == VT && "Unexpected value type")(static_cast <bool> (N->getOperand(2).getValueType()
== VT && "Unexpected value type") ? void (0) : __assert_fail
("N->getOperand(2).getValueType() == VT && \"Unexpected value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5826, __extension__ __PRETTY_FUNCTION__))
;
5827 IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2);
5828 // Unlike most shuffle nodes, VPERMIV3's mask operand is the first one.
5829 Ops.push_back(N->getOperand(1));
5830 Ops.push_back(N->getOperand(2));
5831 SDValue MaskNode = N->getOperand(0);
5832 unsigned MaskEltSize = VT.getScalarSizeInBits();
5833 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5834 DecodeVPERMV3Mask(C, MaskEltSize, Mask);
5835 break;
5836 }
5837 return false;
5838 }
5839 default: llvm_unreachable("unknown target shuffle node")::llvm::llvm_unreachable_internal("unknown target shuffle node"
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5839)
;
5840 }
5841
5842 // Empty mask indicates the decode failed.
5843 if (Mask.empty())
5844 return false;
5845
5846 // Check if we're getting a shuffle mask with zero'd elements.
5847 if (!AllowSentinelZero)
5848 if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
5849 return false;
5850
5851 // If we have a fake unary shuffle, the shuffle mask is spread across two
5852 // inputs that are actually the same node. Re-map the mask to always point
5853 // into the first input.
5854 if (IsFakeUnary)
5855 for (int &M : Mask)
5856 if (M >= (int)Mask.size())
5857 M -= Mask.size();
5858
5859 // If we didn't already add operands in the opcode-specific code, default to
5860 // adding 1 or 2 operands starting at 0.
5861 if (Ops.empty()) {
5862 Ops.push_back(N->getOperand(0));
5863 if (!IsUnary || IsFakeUnary)
5864 Ops.push_back(N->getOperand(1));
5865 }
5866
5867 return true;
5868}
5869
5870/// Check a target shuffle mask's inputs to see if we can set any values to
5871/// SM_SentinelZero - this is for elements that are known to be zero
5872/// (not just zeroable) from their inputs.
5873/// Returns true if the target shuffle mask was decoded.
5874static bool setTargetShuffleZeroElements(SDValue N,
5875 SmallVectorImpl<int> &Mask,
5876 SmallVectorImpl<SDValue> &Ops) {
5877 bool IsUnary;
5878 if (!isTargetShuffle(N.getOpcode()))
5879 return false;
5880
5881 MVT VT = N.getSimpleValueType();
5882 if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary))
5883 return false;
5884
5885 SDValue V1 = Ops[0];
5886 SDValue V2 = IsUnary ? V1 : Ops[1];
5887
5888 V1 = peekThroughBitcasts(V1);
5889 V2 = peekThroughBitcasts(V2);
5890
5891 assert((VT.getSizeInBits() % Mask.size()) == 0 &&(static_cast <bool> ((VT.getSizeInBits() % Mask.size())
== 0 && "Illegal split of shuffle value type") ? void
(0) : __assert_fail ("(VT.getSizeInBits() % Mask.size()) == 0 && \"Illegal split of shuffle value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5892, __extension__ __PRETTY_FUNCTION__))
5892 "Illegal split of shuffle value type")(static_cast <bool> ((VT.getSizeInBits() % Mask.size())
== 0 && "Illegal split of shuffle value type") ? void
(0) : __assert_fail ("(VT.getSizeInBits() % Mask.size()) == 0 && \"Illegal split of shuffle value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5892, __extension__ __PRETTY_FUNCTION__))
;
5893 unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size();
5894
5895 // Extract known constant input data.
5896 APInt UndefSrcElts[2];
5897 SmallVector<APInt, 32> SrcEltBits[2];
5898 bool IsSrcConstant[2] = {
5899 getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0],
5900 SrcEltBits[0], true, false),
5901 getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1],
5902 SrcEltBits[1], true, false)};
5903
5904 for (int i = 0, Size = Mask.size(); i < Size; ++i) {
5905 int M = Mask[i];
5906
5907 // Already decoded as SM_SentinelZero / SM_SentinelUndef.
5908 if (M < 0)
5909 continue;
5910
5911 // Determine shuffle input and normalize the mask.
5912 unsigned SrcIdx = M / Size;
5913 SDValue V = M < Size ? V1 : V2;
5914 M %= Size;
5915
5916 // We are referencing an UNDEF input.
5917 if (V.isUndef()) {
5918 Mask[i] = SM_SentinelUndef;
5919 continue;
5920 }
5921
5922 // SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF.
5923 // TODO: We currently only set UNDEF for integer types - floats use the same
5924 // registers as vectors and many of the scalar folded loads rely on the
5925 // SCALAR_TO_VECTOR pattern.
5926 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
5927 (Size % V.getValueType().getVectorNumElements()) == 0) {
5928 int Scale = Size / V.getValueType().getVectorNumElements();
5929 int Idx = M / Scale;
5930 if (Idx != 0 && !VT.isFloatingPoint())
5931 Mask[i] = SM_SentinelUndef;
5932 else if (Idx == 0 && X86::isZeroNode(V.getOperand(0)))
5933 Mask[i] = SM_SentinelZero;
5934 continue;
5935 }
5936
5937 // Attempt to extract from the source's constant bits.
5938 if (IsSrcConstant[SrcIdx]) {
5939 if (UndefSrcElts[SrcIdx][M])
5940 Mask[i] = SM_SentinelUndef;
5941 else if (SrcEltBits[SrcIdx][M] == 0)
5942 Mask[i] = SM_SentinelZero;
5943 }
5944 }
5945
5946 assert(VT.getVectorNumElements() == Mask.size() &&(static_cast <bool> (VT.getVectorNumElements() == Mask.
size() && "Different mask size from vector size!") ? void
(0) : __assert_fail ("VT.getVectorNumElements() == Mask.size() && \"Different mask size from vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5947, __extension__ __PRETTY_FUNCTION__))
5947 "Different mask size from vector size!")(static_cast <bool> (VT.getVectorNumElements() == Mask.
size() && "Different mask size from vector size!") ? void
(0) : __assert_fail ("VT.getVectorNumElements() == Mask.size() && \"Different mask size from vector size!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5947, __extension__ __PRETTY_FUNCTION__))
;
5948 return true;
5949}
5950
5951// Attempt to decode ops that could be represented as a shuffle mask.
5952// The decoded shuffle mask may contain a different number of elements to the
5953// destination value type.
5954static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
5955 SmallVectorImpl<SDValue> &Ops,
5956 SelectionDAG &DAG) {
5957 Mask.clear();
5958 Ops.clear();
5959
5960 MVT VT = N.getSimpleValueType();
5961 unsigned NumElts = VT.getVectorNumElements();
5962 unsigned NumSizeInBits = VT.getSizeInBits();
5963 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
5964 assert((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 &&(static_cast <bool> ((NumBitsPerElt % 8) == 0 &&
(NumSizeInBits % 8) == 0 && "Expected byte aligned value types"
) ? void (0) : __assert_fail ("(NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 && \"Expected byte aligned value types\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5965, __extension__ __PRETTY_FUNCTION__))
5965 "Expected byte aligned value types")(static_cast <bool> ((NumBitsPerElt % 8) == 0 &&
(NumSizeInBits % 8) == 0 && "Expected byte aligned value types"
) ? void (0) : __assert_fail ("(NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 && \"Expected byte aligned value types\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 5965, __extension__ __PRETTY_FUNCTION__))
;
5966
5967 unsigned Opcode = N.getOpcode();
5968 switch (Opcode) {
5969 case ISD::AND:
5970 case X86ISD::ANDNP: {
5971 // Attempt to decode as a per-byte mask.
5972 APInt UndefElts;
5973 SmallVector<APInt, 32> EltBits;
5974 SDValue N0 = N.getOperand(0);
5975 SDValue N1 = N.getOperand(1);
5976 bool IsAndN = (X86ISD::ANDNP == Opcode);
5977 uint64_t ZeroMask = IsAndN ? 255 : 0;
5978 if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
5979 return false;
5980 for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
5981 if (UndefElts[i]) {
5982 Mask.push_back(SM_SentinelUndef);
5983 continue;
5984 }
5985 uint64_t ByteBits = EltBits[i].getZExtValue();
5986 if (ByteBits != 0 && ByteBits != 255)
5987 return false;
5988 Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i);
5989 }
5990 Ops.push_back(IsAndN ? N1 : N0);
5991 return true;
5992 }
5993 case ISD::SCALAR_TO_VECTOR: {
5994 // Match against a scalar_to_vector of an extract from a vector,
5995 // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
5996 SDValue N0 = N.getOperand(0);
5997 SDValue SrcExtract;
5998
5999 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6000 N0.getOperand(0).getValueType() == VT) ||
6001 (N0.getOpcode() == X86ISD::PEXTRW &&
6002 N0.getOperand(0).getValueType() == MVT::v8i16) ||
6003 (N0.getOpcode() == X86ISD::PEXTRB &&
6004 N0.getOperand(0).getValueType() == MVT::v16i8)) {
6005 SrcExtract = N0;
6006 }
6007
6008 if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)))
6009 return false;
6010
6011 SDValue SrcVec = SrcExtract.getOperand(0);
6012 EVT SrcVT = SrcVec.getValueType();
6013 unsigned NumSrcElts = SrcVT.getVectorNumElements();
6014 unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1;
6015
6016 unsigned SrcIdx = SrcExtract.getConstantOperandVal(1);
6017 if (NumSrcElts <= SrcIdx)
6018 return false;
6019
6020 Ops.push_back(SrcVec);
6021 Mask.push_back(SrcIdx);
6022 Mask.append(NumZeros, SM_SentinelZero);
6023 Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef);
6024 return true;
6025 }
6026 case X86ISD::PINSRB:
6027 case X86ISD::PINSRW: {
6028 SDValue InVec = N.getOperand(0);
6029 SDValue InScl = N.getOperand(1);
6030 uint64_t InIdx = N.getConstantOperandVal(2);
6031 assert(InIdx < NumElts && "Illegal insertion index")(static_cast <bool> (InIdx < NumElts && "Illegal insertion index"
) ? void (0) : __assert_fail ("InIdx < NumElts && \"Illegal insertion index\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6031, __extension__ __PRETTY_FUNCTION__))
;
6032
6033 // Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern.
6034 if (X86::isZeroNode(InScl)) {
6035 Ops.push_back(InVec);
6036 for (unsigned i = 0; i != NumElts; ++i)
6037 Mask.push_back(i == InIdx ? SM_SentinelZero : (int)i);
6038 return true;
6039 }
6040
6041 // Attempt to recognise a PINSR*(PEXTR*) shuffle pattern.
6042 // TODO: Expand this to support INSERT_VECTOR_ELT/etc.
6043 unsigned ExOp =
6044 (X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW);
6045 if (InScl.getOpcode() != ExOp)
6046 return false;
6047
6048 SDValue ExVec = InScl.getOperand(0);
6049 uint64_t ExIdx = InScl.getConstantOperandVal(1);
6050 assert(ExIdx < NumElts && "Illegal extraction index")(static_cast <bool> (ExIdx < NumElts && "Illegal extraction index"
) ? void (0) : __assert_fail ("ExIdx < NumElts && \"Illegal extraction index\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6050, __extension__ __PRETTY_FUNCTION__))
;
6051 Ops.push_back(InVec);
6052 Ops.push_back(ExVec);
6053 for (unsigned i = 0; i != NumElts; ++i)
6054 Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
6055 return true;
6056 }
6057 case X86ISD::PACKSS:
6058 case X86ISD::PACKUS: {
6059 SDValue N0 = N.getOperand(0);
6060 SDValue N1 = N.getOperand(1);
6061 assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) &&(static_cast <bool> (N0.getValueType().getVectorNumElements
() == (NumElts / 2) && N1.getValueType().getVectorNumElements
() == (NumElts / 2) && "Unexpected input value type")
? void (0) : __assert_fail ("N0.getValueType().getVectorNumElements() == (NumElts / 2) && N1.getValueType().getVectorNumElements() == (NumElts / 2) && \"Unexpected input value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6063, __extension__ __PRETTY_FUNCTION__))
6062 N1.getValueType().getVectorNumElements() == (NumElts / 2) &&(static_cast <bool> (N0.getValueType().getVectorNumElements
() == (NumElts / 2) && N1.getValueType().getVectorNumElements
() == (NumElts / 2) && "Unexpected input value type")
? void (0) : __assert_fail ("N0.getValueType().getVectorNumElements() == (NumElts / 2) && N1.getValueType().getVectorNumElements() == (NumElts / 2) && \"Unexpected input value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6063, __extension__ __PRETTY_FUNCTION__))
6063 "Unexpected input value type")(static_cast <bool> (N0.getValueType().getVectorNumElements
() == (NumElts / 2) && N1.getValueType().getVectorNumElements
() == (NumElts / 2) && "Unexpected input value type")
? void (0) : __assert_fail ("N0.getValueType().getVectorNumElements() == (NumElts / 2) && N1.getValueType().getVectorNumElements() == (NumElts / 2) && \"Unexpected input value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6063, __extension__ __PRETTY_FUNCTION__))
;
6064
6065 // If we know input saturation won't happen we can treat this
6066 // as a truncation shuffle.
6067 if (Opcode == X86ISD::PACKSS) {
6068 if ((!N0.isUndef() && DAG.ComputeNumSignBits(N0) <= NumBitsPerElt) ||
6069 (!N1.isUndef() && DAG.ComputeNumSignBits(N1) <= NumBitsPerElt))
6070 return false;
6071 } else {
6072 APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt);
6073 if ((!N0.isUndef() && !DAG.MaskedValueIsZero(N0, ZeroMask)) ||
6074 (!N1.isUndef() && !DAG.MaskedValueIsZero(N1, ZeroMask)))
6075 return false;
6076 }
6077
6078 bool IsUnary = (N0 == N1);
6079
6080 Ops.push_back(N0);
6081 if (!IsUnary)
6082 Ops.push_back(N1);
6083
6084 createPackShuffleMask(VT, Mask, IsUnary);
6085 return true;
6086 }
6087 case X86ISD::VSHLI:
6088 case X86ISD::VSRLI: {
6089 uint64_t ShiftVal = N.getConstantOperandVal(1);
6090 // Out of range bit shifts are guaranteed to be zero.
6091 if (NumBitsPerElt <= ShiftVal) {
6092 Mask.append(NumElts, SM_SentinelZero);
6093 return true;
6094 }
6095
6096 // We can only decode 'whole byte' bit shifts as shuffles.
6097 if ((ShiftVal % 8) != 0)
6098 break;
6099
6100 uint64_t ByteShift = ShiftVal / 8;
6101 unsigned NumBytes = NumSizeInBits / 8;
6102 unsigned NumBytesPerElt = NumBitsPerElt / 8;
6103 Ops.push_back(N.getOperand(0));
6104
6105 // Clear mask to all zeros and insert the shifted byte indices.
6106 Mask.append(NumBytes, SM_SentinelZero);
6107
6108 if (X86ISD::VSHLI == Opcode) {
6109 for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
6110 for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6111 Mask[i + j] = i + j - ByteShift;
6112 } else {
6113 for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
6114 for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
6115 Mask[i + j - ByteShift] = i + j;
6116 }
6117 return true;
6118 }
6119 case ISD::ZERO_EXTEND_VECTOR_INREG:
6120 case X86ISD::VZEXT: {
6121 // TODO - add support for VPMOVZX with smaller input vector types.
6122 SDValue Src = N.getOperand(0);
6123 MVT SrcVT = Src.getSimpleValueType();
6124 if (NumSizeInBits != SrcVT.getSizeInBits())
6125 break;
6126 DecodeZeroExtendMask(SrcVT.getScalarType(), VT, Mask);
6127 Ops.push_back(Src);
6128 return true;
6129 }
6130 }
6131
6132 return false;
6133}
6134
6135/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly.
6136static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
6137 SmallVectorImpl<int> &Mask) {
6138 int MaskWidth = Mask.size();
6139 SmallVector<SDValue, 16> UsedInputs;
6140 for (int i = 0, e = Inputs.size(); i < e; ++i) {
6141 int lo = UsedInputs.size() * MaskWidth;
6142 int hi = lo + MaskWidth;
6143
6144 // Strip UNDEF input usage.
6145 if (Inputs[i].isUndef())
6146 for (int &M : Mask)
6147 if ((lo <= M) && (M < hi))
6148 M = SM_SentinelUndef;
6149
6150 // Check for unused inputs.
6151 if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
6152 UsedInputs.push_back(Inputs[i]);
6153 continue;
6154 }
6155 for (int &M : Mask)
6156 if (lo <= M)
6157 M -= MaskWidth;
6158 }
6159 Inputs = UsedInputs;
6160}
6161
6162/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs
6163/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the
6164/// remaining input indices in case we now have a unary shuffle and adjust the
6165/// inputs accordingly.
6166/// Returns true if the target shuffle mask was decoded.
6167static bool resolveTargetShuffleInputs(SDValue Op,
6168 SmallVectorImpl<SDValue> &Inputs,
6169 SmallVectorImpl<int> &Mask,
6170 SelectionDAG &DAG) {
6171 if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
6172 if (!getFauxShuffleMask(Op, Mask, Inputs, DAG))
6173 return false;
6174
6175 resolveTargetShuffleInputsAndMask(Inputs, Mask);
6176 return true;
6177}
6178
6179/// Returns the scalar element that will make up the ith
6180/// element of the result of the vector shuffle.
6181static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
6182 unsigned Depth) {
6183 if (Depth == 6)
6184 return SDValue(); // Limit search depth.
6185
6186 SDValue V = SDValue(N, 0);
6187 EVT VT = V.getValueType();
6188 unsigned Opcode = V.getOpcode();
6189
6190 // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
6191 if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
6192 int Elt = SV->getMaskElt(Index);
6193
6194 if (Elt < 0)
6195 return DAG.getUNDEF(VT.getVectorElementType());
6196
6197 unsigned NumElems = VT.getVectorNumElements();
6198 SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
6199 : SV->getOperand(1);
6200 return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
6201 }
6202
6203 // Recurse into target specific vector shuffles to find scalars.
6204 if (isTargetShuffle(Opcode)) {
6205 MVT ShufVT = V.getSimpleValueType();
6206 MVT ShufSVT = ShufVT.getVectorElementType();
6207 int NumElems = (int)ShufVT.getVectorNumElements();
6208 SmallVector<int, 16> ShuffleMask;
6209 SmallVector<SDValue, 16> ShuffleOps;
6210 bool IsUnary;
6211
6212 if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, IsUnary))
6213 return SDValue();
6214
6215 int Elt = ShuffleMask[Index];
6216 if (Elt == SM_SentinelZero)
6217 return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(N), ShufSVT)
6218 : DAG.getConstantFP(+0.0, SDLoc(N), ShufSVT);
6219 if (Elt == SM_SentinelUndef)
6220 return DAG.getUNDEF(ShufSVT);
6221
6222 assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range")(static_cast <bool> (0 <= Elt && Elt < (2
*NumElems) && "Shuffle index out of range") ? void (0
) : __assert_fail ("0 <= Elt && Elt < (2*NumElems) && \"Shuffle index out of range\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6222, __extension__ __PRETTY_FUNCTION__))
;
6223 SDValue NewV = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1];
6224 return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
6225 Depth+1);
6226 }
6227
6228 // Actual nodes that may contain scalar elements
6229 if (Opcode == ISD::BITCAST) {
6230 V = V.getOperand(0);
6231 EVT SrcVT = V.getValueType();
6232 unsigned NumElems = VT.getVectorNumElements();
6233
6234 if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
6235 return SDValue();
6236 }
6237
6238 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
6239 return (Index == 0) ? V.getOperand(0)
6240 : DAG.getUNDEF(VT.getVectorElementType());
6241
6242 if (V.getOpcode() == ISD::BUILD_VECTOR)
6243 return V.getOperand(Index);
6244
6245 return SDValue();
6246}
6247
6248// Use PINSRB/PINSRW/PINSRD to create a build vector.
6249static SDValue LowerBuildVectorAsInsert(SDValue Op, unsigned NonZeros,
6250 unsigned NumNonZero, unsigned NumZero,
6251 SelectionDAG &DAG,
6252 const X86Subtarget &Subtarget) {
6253 MVT VT = Op.getSimpleValueType();
6254 unsigned NumElts = VT.getVectorNumElements();
6255 assert(((VT == MVT::v8i16 && Subtarget.hasSSE2()) ||(static_cast <bool> (((VT == MVT::v8i16 && Subtarget
.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) &&
Subtarget.hasSSE41())) && "Illegal vector insertion"
) ? void (0) : __assert_fail ("((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && \"Illegal vector insertion\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6257, __extension__ __PRETTY_FUNCTION__))
6256 ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) &&(static_cast <bool> (((VT == MVT::v8i16 && Subtarget
.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) &&
Subtarget.hasSSE41())) && "Illegal vector insertion"
) ? void (0) : __assert_fail ("((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && \"Illegal vector insertion\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6257, __extension__ __PRETTY_FUNCTION__))
6257 "Illegal vector insertion")(static_cast <bool> (((VT == MVT::v8i16 && Subtarget
.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) &&
Subtarget.hasSSE41())) && "Illegal vector insertion"
) ? void (0) : __assert_fail ("((VT == MVT::v8i16 && Subtarget.hasSSE2()) || ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && \"Illegal vector insertion\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6257, __extension__ __PRETTY_FUNCTION__))
;
6258
6259 SDLoc dl(Op);
6260 SDValue V;
6261 bool First = true;
6262
6263 for (unsigned i = 0; i < NumElts; ++i) {
6264 bool IsNonZero = (NonZeros & (1 << i)) != 0;
6265 if (!IsNonZero)
6266 continue;
6267
6268 // If the build vector contains zeros or our first insertion is not the
6269 // first index then insert into zero vector to break any register
6270 // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
6271 if (First) {
6272 First = false;
6273 if (NumZero || 0 != i)
6274 V = getZeroVector(VT, Subtarget, DAG, dl);
6275 else {
6276 assert(0 == i && "Expected insertion into zero-index")(static_cast <bool> (0 == i && "Expected insertion into zero-index"
) ? void (0) : __assert_fail ("0 == i && \"Expected insertion into zero-index\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6276, __extension__ __PRETTY_FUNCTION__))
;
6277 V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
6278 V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
6279 V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
6280 V = DAG.getBitcast(VT, V);
6281 continue;
6282 }
6283 }
6284 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V, Op.getOperand(i),
6285 DAG.getIntPtrConstant(i, dl));
6286 }
6287
6288 return V;
6289}
6290
6291/// Custom lower build_vector of v16i8.
6292static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
6293 unsigned NumNonZero, unsigned NumZero,
6294 SelectionDAG &DAG,
6295 const X86Subtarget &Subtarget) {
6296 if (NumNonZero > 8 && !Subtarget.hasSSE41())
6297 return SDValue();
6298
6299 // SSE4.1 - use PINSRB to insert each byte directly.
6300 if (Subtarget.hasSSE41())
6301 return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
6302 Subtarget);
6303
6304 SDLoc dl(Op);
6305 SDValue V;
6306 bool First = true;
6307
6308 // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
6309 for (unsigned i = 0; i < 16; ++i) {
6310 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
6311 if (ThisIsNonZero && First) {
6312 if (NumZero)
6313 V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
6314 else
6315 V = DAG.getUNDEF(MVT::v8i16);
6316 First = false;
6317 }
6318
6319 if ((i & 1) != 0) {
6320 // FIXME: Investigate extending to i32 instead of just i16.
6321 // FIXME: Investigate combining the first 4 bytes as a i32 instead.
6322 SDValue ThisElt, LastElt;
6323 bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
6324 if (LastIsNonZero) {
6325 LastElt =
6326 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1));
6327 }
6328 if (ThisIsNonZero) {
6329 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
6330 ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt,
6331 DAG.getConstant(8, dl, MVT::i8));
6332 if (LastIsNonZero)
6333 ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
6334 } else
6335 ThisElt = LastElt;
6336
6337 if (ThisElt) {
6338 if (1 == i) {
6339 V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
6340 : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
6341 V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
6342 V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
6343 V = DAG.getBitcast(MVT::v8i16, V);
6344 } else {
6345 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
6346 DAG.getIntPtrConstant(i / 2, dl));
6347 }
6348 }
6349 }
6350 }
6351
6352 return DAG.getBitcast(MVT::v16i8, V);
6353}
6354
6355/// Custom lower build_vector of v8i16.
6356static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
6357 unsigned NumNonZero, unsigned NumZero,
6358 SelectionDAG &DAG,
6359 const X86Subtarget &Subtarget) {
6360 if (NumNonZero > 4 && !Subtarget.hasSSE41())
6361 return SDValue();
6362
6363 // Use PINSRW to insert each byte directly.
6364 return LowerBuildVectorAsInsert(Op, NonZeros, NumNonZero, NumZero, DAG,
6365 Subtarget);
6366}
6367
6368/// Custom lower build_vector of v4i32 or v4f32.
6369static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
6370 const X86Subtarget &Subtarget) {
6371 // Find all zeroable elements.
6372 std::bitset<4> Zeroable;
6373 for (int i=0; i < 4; ++i) {
6374 SDValue Elt = Op->getOperand(i);
6375 Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt));
6376 }
6377 assert(Zeroable.size() - Zeroable.count() > 1 &&(static_cast <bool> (Zeroable.size() - Zeroable.count()
> 1 && "We expect at least two non-zero elements!"
) ? void (0) : __assert_fail ("Zeroable.size() - Zeroable.count() > 1 && \"We expect at least two non-zero elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6378, __extension__ __PRETTY_FUNCTION__))
6378 "We expect at least two non-zero elements!")(static_cast <bool> (Zeroable.size() - Zeroable.count()
> 1 && "We expect at least two non-zero elements!"
) ? void (0) : __assert_fail ("Zeroable.size() - Zeroable.count() > 1 && \"We expect at least two non-zero elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6378, __extension__ __PRETTY_FUNCTION__))
;
6379
6380 // We only know how to deal with build_vector nodes where elements are either
6381 // zeroable or extract_vector_elt with constant index.
6382 SDValue FirstNonZero;
6383 unsigned FirstNonZeroIdx;
6384 for (unsigned i=0; i < 4; ++i) {
6385 if (Zeroable[i])
6386 continue;
6387 SDValue Elt = Op->getOperand(i);
6388 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6389 !isa<ConstantSDNode>(Elt.getOperand(1)))
6390 return SDValue();
6391 // Make sure that this node is extracting from a 128-bit vector.
6392 MVT VT = Elt.getOperand(0).getSimpleValueType();
6393 if (!VT.is128BitVector())
6394 return SDValue();
6395 if (!FirstNonZero.getNode()) {
6396 FirstNonZero = Elt;
6397 FirstNonZeroIdx = i;
6398 }
6399 }
6400
6401 assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!")(static_cast <bool> (FirstNonZero.getNode() && "Unexpected build vector of all zeros!"
) ? void (0) : __assert_fail ("FirstNonZero.getNode() && \"Unexpected build vector of all zeros!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6401, __extension__ __PRETTY_FUNCTION__))
;
6402 SDValue V1 = FirstNonZero.getOperand(0);
6403 MVT VT = V1.getSimpleValueType();
6404
6405 // See if this build_vector can be lowered as a blend with zero.
6406 SDValue Elt;
6407 unsigned EltMaskIdx, EltIdx;
6408 int Mask[4];
6409 for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
6410 if (Zeroable[EltIdx]) {
6411 // The zero vector will be on the right hand side.
6412 Mask[EltIdx] = EltIdx+4;
6413 continue;
6414 }
6415
6416 Elt = Op->getOperand(EltIdx);
6417 // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
6418 EltMaskIdx = Elt.getConstantOperandVal(1);
6419 if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
6420 break;
6421 Mask[EltIdx] = EltIdx;
6422 }
6423
6424 if (EltIdx == 4) {
6425 // Let the shuffle legalizer deal with blend operations.
6426 SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
6427 if (V1.getSimpleValueType() != VT)
6428 V1 = DAG.getBitcast(VT, V1);
6429 return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, Mask);
6430 }
6431
6432 // See if we can lower this build_vector to a INSERTPS.
6433 if (!Subtarget.hasSSE41())
6434 return SDValue();
6435
6436 SDValue V2 = Elt.getOperand(0);
6437 if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
6438 V1 = SDValue();
6439
6440 bool CanFold = true;
6441 for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
6442 if (Zeroable[i])
6443 continue;
6444
6445 SDValue Current = Op->getOperand(i);
6446 SDValue SrcVector = Current->getOperand(0);
6447 if (!V1.getNode())
6448 V1 = SrcVector;
6449 CanFold = (SrcVector == V1) && (Current.getConstantOperandVal(1) == i);
6450 }
6451
6452 if (!CanFold)
6453 return SDValue();
6454
6455 assert(V1.getNode() && "Expected at least two non-zero elements!")(static_cast <bool> (V1.getNode() && "Expected at least two non-zero elements!"
) ? void (0) : __assert_fail ("V1.getNode() && \"Expected at least two non-zero elements!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6455, __extension__ __PRETTY_FUNCTION__))
;
6456 if (V1.getSimpleValueType() != MVT::v4f32)
6457 V1 = DAG.getBitcast(MVT::v4f32, V1);
6458 if (V2.getSimpleValueType() != MVT::v4f32)
6459 V2 = DAG.getBitcast(MVT::v4f32, V2);
6460
6461 // Ok, we can emit an INSERTPS instruction.
6462 unsigned ZMask = Zeroable.to_ulong();
6463
6464 unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
6465 assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!")(static_cast <bool> ((InsertPSMask & ~0xFFu) == 0 &&
"Invalid mask!") ? void (0) : __assert_fail ("(InsertPSMask & ~0xFFu) == 0 && \"Invalid mask!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6465, __extension__ __PRETTY_FUNCTION__))
;
6466 SDLoc DL(Op);
6467 SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
6468 DAG.getIntPtrConstant(InsertPSMask, DL));
6469 return DAG.getBitcast(VT, Result);
6470}
6471
6472/// Return a vector logical shift node.
6473static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
6474 SelectionDAG &DAG, const TargetLowering &TLI,
6475 const SDLoc &dl) {
6476 assert(VT.is128BitVector() && "Unknown type for VShift")(static_cast <bool> (VT.is128BitVector() && "Unknown type for VShift"
) ? void (0) : __assert_fail ("VT.is128BitVector() && \"Unknown type for VShift\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6476, __extension__ __PRETTY_FUNCTION__))
;
6477 MVT ShVT = MVT::v16i8;
6478 unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
6479 SrcOp = DAG.getBitcast(ShVT, SrcOp);
6480 MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
6481 assert(NumBits % 8 == 0 && "Only support byte sized shifts")(static_cast <bool> (NumBits % 8 == 0 && "Only support byte sized shifts"
) ? void (0) : __assert_fail ("NumBits % 8 == 0 && \"Only support byte sized shifts\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6481, __extension__ __PRETTY_FUNCTION__))
;
6482 SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
6483 return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
6484}
6485
6486static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
6487 SelectionDAG &DAG) {
6488
6489 // Check if the scalar load can be widened into a vector load. And if
6490 // the address is "base + cst" see if the cst can be "absorbed" into
6491 // the shuffle mask.
6492 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
6493 SDValue Ptr = LD->getBasePtr();
6494 if (!ISD::isNormalLoad(LD) || LD->isVolatile())
6495 return SDValue();
6496 EVT PVT = LD->getValueType(0);
6497 if (PVT != MVT::i32 && PVT != MVT::f32)
6498 return SDValue();
6499
6500 int FI = -1;
6501 int64_t Offset = 0;
6502 if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
6503 FI = FINode->getIndex();
6504 Offset = 0;
6505 } else if (DAG.isBaseWithConstantOffset(Ptr) &&
6506 isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
6507 FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
6508 Offset = Ptr.getConstantOperandVal(1);
6509 Ptr = Ptr.getOperand(0);
6510 } else {
6511 return SDValue();
6512 }
6513
6514 // FIXME: 256-bit vector instructions don't require a strict alignment,
6515 // improve this code to support it better.
6516 unsigned RequiredAlign = VT.getSizeInBits()/8;
6517 SDValue Chain = LD->getChain();
6518 // Make sure the stack object alignment is at least 16 or 32.
6519 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6520 if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
6521 if (MFI.isFixedObjectIndex(FI)) {
6522 // Can't change the alignment. FIXME: It's possible to compute
6523 // the exact stack offset and reference FI + adjust offset instead.
6524 // If someone *really* cares about this. That's the way to implement it.
6525 return SDValue();
6526 } else {
6527 MFI.setObjectAlignment(FI, RequiredAlign);
6528 }
6529 }
6530
6531 // (Offset % 16 or 32) must be multiple of 4. Then address is then
6532 // Ptr + (Offset & ~15).
6533 if (Offset < 0)
6534 return SDValue();
6535 if ((Offset % RequiredAlign) & 3)
6536 return SDValue();
6537 int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1);
6538 if (StartOffset) {
6539 SDLoc DL(Ptr);
6540 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
6541 DAG.getConstant(StartOffset, DL, Ptr.getValueType()));
6542 }
6543
6544 int EltNo = (Offset - StartOffset) >> 2;
6545 unsigned NumElems = VT.getVectorNumElements();
6546
6547 EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
6548 SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
6549 LD->getPointerInfo().getWithOffset(StartOffset));
6550
6551 SmallVector<int, 8> Mask(NumElems, EltNo);
6552
6553 return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), Mask);
6554 }
6555
6556 return SDValue();
6557}
6558
6559/// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
6560/// elements can be replaced by a single large load which has the same value as
6561/// a build_vector or insert_subvector whose loaded operands are 'Elts'.
6562///
6563/// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a
6564static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
6565 const SDLoc &DL, SelectionDAG &DAG,
6566 const X86Subtarget &Subtarget,
6567 bool isAfterLegalize) {
6568 unsigned NumElems = Elts.size();
6569
6570 int LastLoadedElt = -1;
6571 SmallBitVector LoadMask(NumElems, false);
6572 SmallBitVector ZeroMask(NumElems, false);
6573 SmallBitVector UndefMask(NumElems, false);
6
Calling constructor for 'SmallBitVector'
9
Returning from constructor for 'SmallBitVector'
6574
6575 // For each element in the initializer, see if we've found a load, zero or an
6576 // undef.
6577 for (unsigned i = 0; i < NumElems; ++i) {
10
Loop condition is true. Entering loop body
6578 SDValue Elt = peekThroughBitcasts(Elts[i]);
6579 if (!Elt.getNode())
11
Assuming the condition is false
12
Taking false branch
6580 return SDValue();
6581
6582 if (Elt.isUndef())
13
Taking false branch
6583 UndefMask[i] = true;
6584 else if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode()))
14
Assuming the condition is false
15
Assuming the condition is false
16
Taking false branch
6585 ZeroMask[i] = true;
6586 else if (ISD::isNON_EXTLoad(Elt.getNode())) {
17
Taking false branch
6587 LoadMask[i] = true;
6588 LastLoadedElt = i;
6589 // Each loaded element must be the correct fractional portion of the
6590 // requested vector load.
6591 if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits())
6592 return SDValue();
6593 } else
6594 return SDValue();
6595 }
6596 assert((ZeroMask | UndefMask | LoadMask).count() == NumElems &&(static_cast <bool> ((ZeroMask | UndefMask | LoadMask).
count() == NumElems && "Incomplete element masks") ? void
(0) : __assert_fail ("(ZeroMask | UndefMask | LoadMask).count() == NumElems && \"Incomplete element masks\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6597, __extension__ __PRETTY_FUNCTION__))
6597 "Incomplete element masks")(static_cast <bool> ((ZeroMask | UndefMask | LoadMask).
count() == NumElems && "Incomplete element masks") ? void
(0) : __assert_fail ("(ZeroMask | UndefMask | LoadMask).count() == NumElems && \"Incomplete element masks\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6597, __extension__ __PRETTY_FUNCTION__))
;
6598
6599 // Handle Special Cases - all undef or undef/zero.
6600 if (UndefMask.count() == NumElems)
6601 return DAG.getUNDEF(VT);
6602
6603 // FIXME: Should we return this as a BUILD_VECTOR instead?
6604 if ((ZeroMask | UndefMask).count() == NumElems)
6605 return VT.isInteger() ? DAG.getConstant(0, DL, VT)
6606 : DAG.getConstantFP(0.0, DL, VT);
6607
6608 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6609 int FirstLoadedElt = LoadMask.find_first();
6610 SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]);
6611 LoadSDNode *LDBase = cast<LoadSDNode>(EltBase);
6612 EVT LDBaseVT = EltBase.getValueType();
6613
6614 // Consecutive loads can contain UNDEFS but not ZERO elements.
6615 // Consecutive loads with UNDEFs and ZEROs elements require a
6616 // an additional shuffle stage to clear the ZERO elements.
6617 bool IsConsecutiveLoad = true;
6618 bool IsConsecutiveLoadWithZeros = true;
6619 for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
6620 if (LoadMask[i]) {
6621 SDValue Elt = peekThroughBitcasts(Elts[i]);
6622 LoadSDNode *LD = cast<LoadSDNode>(Elt);
6623 if (!DAG.areNonVolatileConsecutiveLoads(
6624 LD, LDBase, Elt.getValueType().getStoreSizeInBits() / 8,
6625 i - FirstLoadedElt)) {
6626 IsConsecutiveLoad = false;
6627 IsConsecutiveLoadWithZeros = false;
6628 break;
6629 }
6630 } else if (ZeroMask[i]) {
6631 IsConsecutiveLoad = false;
6632 }
6633 }
6634
6635 SmallVector<LoadSDNode *, 8> Loads;
6636 for (int i = FirstLoadedElt; i <= LastLoadedElt; ++i)
6637 if (LoadMask[i])
6638 Loads.push_back(cast<LoadSDNode>(peekThroughBitcasts(Elts[i])));
6639
6640 auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) {
6641 auto MMOFlags = LDBase->getMemOperand()->getFlags();
6642 assert(!(MMOFlags & MachineMemOperand::MOVolatile) &&(static_cast <bool> (!(MMOFlags & MachineMemOperand
::MOVolatile) && "Cannot merge volatile loads.") ? void
(0) : __assert_fail ("!(MMOFlags & MachineMemOperand::MOVolatile) && \"Cannot merge volatile loads.\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6643, __extension__ __PRETTY_FUNCTION__))
6643 "Cannot merge volatile loads.")(static_cast <bool> (!(MMOFlags & MachineMemOperand
::MOVolatile) && "Cannot merge volatile loads.") ? void
(0) : __assert_fail ("!(MMOFlags & MachineMemOperand::MOVolatile) && \"Cannot merge volatile loads.\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6643, __extension__ __PRETTY_FUNCTION__))
;
6644 SDValue NewLd =
6645 DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
6646 LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
6647 for (auto *LD : Loads)
6648 DAG.makeEquivalentMemoryOrdering(LD, NewLd);
6649 return NewLd;
6650 };
6651
6652 // LOAD - all consecutive load/undefs (must start/end with a load).
6653 // If we have found an entire vector of loads and undefs, then return a large
6654 // load of the entire vector width starting at the base pointer.
6655 // If the vector contains zeros, then attempt to shuffle those elements.
6656 if (FirstLoadedElt == 0 && LastLoadedElt == (int)(NumElems - 1) &&
6657 (IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) {
6658 assert(LDBase && "Did not find base load for merging consecutive loads")(static_cast <bool> (LDBase && "Did not find base load for merging consecutive loads"
) ? void (0) : __assert_fail ("LDBase && \"Did not find base load for merging consecutive loads\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6658, __extension__ __PRETTY_FUNCTION__))
;
6659 EVT EltVT = LDBase->getValueType(0);
6660 // Ensure that the input vector size for the merged loads matches the
6661 // cumulative size of the input elements.
6662 if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
6663 return SDValue();
6664
6665 if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
6666 return SDValue();
6667
6668 // Don't create 256-bit non-temporal aligned loads without AVX2 as these
6669 // will lower to regular temporal loads and use the cache.
6670 if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 &&
6671 VT.is256BitVector() && !Subtarget.hasInt256())
6672 return SDValue();
6673
6674 if (IsConsecutiveLoad)
6675 return CreateLoad(VT, LDBase);
6676
6677 // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
6678 // vector and a zero vector to clear out the zero elements.
6679 if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) {
6680 SmallVector<int, 4> ClearMask(NumElems, -1);
6681 for (unsigned i = 0; i < NumElems; ++i) {
6682 if (ZeroMask[i])
6683 ClearMask[i] = i + NumElems;
6684 else if (LoadMask[i])
6685 ClearMask[i] = i;
6686 }
6687 SDValue V = CreateLoad(VT, LDBase);
6688 SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
6689 : DAG.getConstantFP(0.0, DL, VT);
6690 return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);
6691 }
6692 }
6693
6694 int LoadSize =
6695 (1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits();
6696
6697 // VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
6698 if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
6699 (LoadSize == 32 || LoadSize == 64) &&
6700 ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
6701 MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSize)
6702 : MVT::getIntegerVT(LoadSize);
6703 MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize);
6704 if (TLI.isTypeLegal(VecVT)) {
6705 SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
6706 SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
6707 SDValue ResNode =
6708 DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT,
6709 LDBase->getPointerInfo(),
6710 LDBase->getAlignment(),
6711 MachineMemOperand::MOLoad);
6712 for (auto *LD : Loads)
6713 DAG.makeEquivalentMemoryOrdering(LD, ResNode);
6714 return DAG.getBitcast(VT, ResNode);
6715 }
6716 }
6717
6718 return SDValue();
6719}
18
Potential leak of memory pointed to by 'UndefMask.X'
6720
6721static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
6722 unsigned SplatBitSize, LLVMContext &C) {
6723 unsigned ScalarSize = VT.getScalarSizeInBits();
6724 unsigned NumElm = SplatBitSize / ScalarSize;
6725
6726 SmallVector<Constant *, 32> ConstantVec;
6727 for (unsigned i = 0; i < NumElm; i++) {
6728 APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
6729 Constant *Const;
6730 if (VT.isFloatingPoint()) {
6731 if (ScalarSize == 32) {
6732 Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
6733 } else {
6734 assert(ScalarSize == 64 && "Unsupported floating point scalar size")(static_cast <bool> (ScalarSize == 64 && "Unsupported floating point scalar size"
) ? void (0) : __assert_fail ("ScalarSize == 64 && \"Unsupported floating point scalar size\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6734, __extension__ __PRETTY_FUNCTION__))
;
6735 Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
6736 }
6737 } else
6738 Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
6739 ConstantVec.push_back(Const);
6740 }
6741 return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
6742}
6743
6744static bool isUseOfShuffle(SDNode *N) {
6745 for (auto *U : N->uses()) {
6746 if (isTargetShuffle(U->getOpcode()))
6747 return true;
6748 if (U->getOpcode() == ISD::BITCAST) // Ignore bitcasts
6749 return isUseOfShuffle(U);
6750 }
6751 return false;
6752}
6753
6754// Check if the current node of build vector is a zero extended vector.
6755// // If so, return the value extended.
6756// // For example: (0,0,0,a,0,0,0,a,0,0,0,a,0,0,0,a) returns a.
6757// // NumElt - return the number of zero extended identical values.
6758// // EltType - return the type of the value include the zero extend.
6759static SDValue isSplatZeroExtended(const BuildVectorSDNode *Op,
6760 unsigned &NumElt, MVT &EltType) {
6761 SDValue ExtValue = Op->getOperand(0);
6762 unsigned NumElts = Op->getNumOperands();
6763 unsigned Delta = NumElts;
6764
6765 for (unsigned i = 1; i < NumElts; i++) {
6766 if (Op->getOperand(i) == ExtValue) {
6767 Delta = i;
6768 break;
6769 }
6770 if (!(Op->getOperand(i).isUndef() || isNullConstant(Op->getOperand(i))))
6771 return SDValue();
6772 }
6773 if (!isPowerOf2_32(Delta) || Delta == 1)
6774 return SDValue();
6775
6776 for (unsigned i = Delta; i < NumElts; i++) {
6777 if (i % Delta == 0) {
6778 if (Op->getOperand(i) != ExtValue)
6779 return SDValue();
6780 } else if (!(isNullConstant(Op->getOperand(i)) ||
6781 Op->getOperand(i).isUndef()))
6782 return SDValue();
6783 }
6784 unsigned EltSize = Op->getSimpleValueType(0).getScalarSizeInBits();
6785 unsigned ExtVTSize = EltSize * Delta;
6786 EltType = MVT::getIntegerVT(ExtVTSize);
6787 NumElt = NumElts / Delta;
6788 return ExtValue;
6789}
6790
6791/// Attempt to use the vbroadcast instruction to generate a splat value
6792/// from a splat BUILD_VECTOR which uses:
6793/// a. A single scalar load, or a constant.
6794/// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>).
6795///
6796/// The VBROADCAST node is returned when a pattern is found,
6797/// or SDValue() otherwise.
6798static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
6799 const X86Subtarget &Subtarget,
6800 SelectionDAG &DAG) {
6801 // VBROADCAST requires AVX.
6802 // TODO: Splats could be generated for non-AVX CPUs using SSE
6803 // instructions, but there's less potential gain for only 128-bit vectors.
6804 if (!Subtarget.hasAVX())
6805 return SDValue();
6806
6807 MVT VT = BVOp->getSimpleValueType(0);
6808 SDLoc dl(BVOp);
6809
6810 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector()) && "Unsupported vector type for broadcast."
) ? void (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Unsupported vector type for broadcast.\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6811, __extension__ __PRETTY_FUNCTION__))
6811 "Unsupported vector type for broadcast.")(static_cast <bool> ((VT.is128BitVector() || VT.is256BitVector
() || VT.is512BitVector()) && "Unsupported vector type for broadcast."
) ? void (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Unsupported vector type for broadcast.\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6811, __extension__ __PRETTY_FUNCTION__))
;
6812
6813 BitVector UndefElements;
6814 SDValue Ld = BVOp->getSplatValue(&UndefElements);
6815
6816 // Attempt to use VBROADCASTM
6817 // From this paterrn:
6818 // a. t0 = (zext_i64 (bitcast_i8 v2i1 X))
6819 // b. t1 = (build_vector t0 t0)
6820 //
6821 // Create (VBROADCASTM v2i1 X)
6822 if (Subtarget.hasCDI() && (VT.is512BitVector() || Subtarget.hasVLX())) {
6823 MVT EltType = VT.getScalarType();
6824 unsigned NumElts = VT.getVectorNumElements();
6825 SDValue BOperand;
6826 SDValue ZeroExtended = isSplatZeroExtended(BVOp, NumElts, EltType);
6827 if ((ZeroExtended && ZeroExtended.getOpcode() == ISD::BITCAST) ||
6828 (Ld && Ld.getOpcode() == ISD::ZERO_EXTEND &&
6829 Ld.getOperand(0).getOpcode() == ISD::BITCAST)) {
6830 if (ZeroExtended)
6831 BOperand = ZeroExtended.getOperand(0);
6832 else
6833 BOperand = Ld.getOperand(0).getOperand(0);
6834 if (BOperand.getValueType().isVector() &&
6835 BOperand.getSimpleValueType().getVectorElementType() == MVT::i1) {
6836 if ((EltType == MVT::i64 && (VT.getVectorElementType() == MVT::i8 ||
6837 NumElts == 8)) || // for broadcastmb2q
6838 (EltType == MVT::i32 && (VT.getVectorElementType() == MVT::i16 ||
6839 NumElts == 16))) { // for broadcastmw2d
6840 SDValue Brdcst =
6841 DAG.getNode(X86ISD::VBROADCASTM, dl,
6842 MVT::getVectorVT(EltType, NumElts), BOperand);
6843 return DAG.getBitcast(VT, Brdcst);
6844 }
6845 }
6846 }
6847 }
6848
6849 // We need a splat of a single value to use broadcast, and it doesn't
6850 // make any sense if the value is only in one element of the vector.
6851 if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
6852 APInt SplatValue, Undef;
6853 unsigned SplatBitSize;
6854 bool HasUndef;
6855 // Check if this is a repeated constant pattern suitable for broadcasting.
6856 if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) &&
6857 SplatBitSize > VT.getScalarSizeInBits() &&
6858 SplatBitSize < VT.getSizeInBits()) {
6859 // Avoid replacing with broadcast when it's a use of a shuffle
6860 // instruction to preserve the present custom lowering of shuffles.
6861 if (isUseOfShuffle(BVOp) || BVOp->hasOneUse())
6862 return SDValue();
6863 // replace BUILD_VECTOR with broadcast of the repeated constants.
6864 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6865 LLVMContext *Ctx = DAG.getContext();
6866 MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
6867 if (Subtarget.hasAVX()) {
6868 if (SplatBitSize <= 64 && Subtarget.hasAVX2() &&
6869 !(SplatBitSize == 64 && Subtarget.is32Bit())) {
6870 // Splatted value can fit in one INTEGER constant in constant pool.
6871 // Load the constant and broadcast it.
6872 MVT CVT = MVT::getIntegerVT(SplatBitSize);
6873 Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize);
6874 Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue);
6875 SDValue CP = DAG.getConstantPool(C, PVT);
6876 unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
6877
6878 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6879 Ld = DAG.getLoad(
6880 CVT, dl, DAG.getEntryNode(), CP,
6881 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6882 Alignment);
6883 SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
6884 MVT::getVectorVT(CVT, Repeat), Ld);
6885 return DAG.getBitcast(VT, Brdcst);
6886 } else if (SplatBitSize == 32 || SplatBitSize == 64) {
6887 // Splatted value can fit in one FLOAT constant in constant pool.
6888 // Load the constant and broadcast it.
6889 // AVX have support for 32 and 64 bit broadcast for floats only.
6890 // No 64bit integer in 32bit subtarget.
6891 MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
6892 // Lower the splat via APFloat directly, to avoid any conversion.
6893 Constant *C =
6894 SplatBitSize == 32
6895 ? ConstantFP::get(*Ctx,
6896 APFloat(APFloat::IEEEsingle(), SplatValue))
6897 : ConstantFP::get(*Ctx,
6898 APFloat(APFloat::IEEEdouble(), SplatValue));
6899 SDValue CP = DAG.getConstantPool(C, PVT);
6900 unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
6901
6902 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6903 Ld = DAG.getLoad(
6904 CVT, dl, DAG.getEntryNode(), CP,
6905 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6906 Alignment);
6907 SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
6908 MVT::getVectorVT(CVT, Repeat), Ld);
6909 return DAG.getBitcast(VT, Brdcst);
6910 } else if (SplatBitSize > 64) {
6911 // Load the vector of constants and broadcast it.
6912 MVT CVT = VT.getScalarType();
6913 Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
6914 *Ctx);
6915 SDValue VCP = DAG.getConstantPool(VecC, PVT);
6916 unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
6917 unsigned Alignment = cast<ConstantPoolSDNode>(VCP)->getAlignment();
6918 Ld = DAG.getLoad(
6919 MVT::getVectorVT(CVT, NumElm), dl, DAG.getEntryNode(), VCP,
6920 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6921 Alignment);
6922 SDValue Brdcst = DAG.getNode(X86ISD::SUBV_BROADCAST, dl, VT, Ld);
6923 return DAG.getBitcast(VT, Brdcst);
6924 }
6925 }
6926 }
6927 return SDValue();
6928 }
6929
6930 bool ConstSplatVal =
6931 (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP);
6932
6933 // Make sure that all of the users of a non-constant load are from the
6934 // BUILD_VECTOR node.
6935 if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
6936 return SDValue();
6937
6938 unsigned ScalarSize = Ld.getValueSizeInBits();
6939 bool IsGE256 = (VT.getSizeInBits() >= 256);
6940
6941 // When optimizing for size, generate up to 5 extra bytes for a broadcast
6942 // instruction to save 8 or more bytes of constant pool data.
6943 // TODO: If multiple splats are generated to load the same constant,
6944 // it may be detrimental to overall size. There needs to be a way to detect
6945 // that condition to know if this is truly a size win.
6946 bool OptForSize = DAG.getMachineFunction().getFunction().optForSize();
6947
6948 // Handle broadcasting a single constant scalar from the constant pool
6949 // into a vector.
6950 // On Sandybridge (no AVX2), it is still better to load a constant vector
6951 // from the constant pool and not to broadcast it from a scalar.
6952 // But override that restriction when optimizing for size.
6953 // TODO: Check if splatting is recommended for other AVX-capable CPUs.
6954 if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) {
6955 EVT CVT = Ld.getValueType();
6956 assert(!CVT.isVector() && "Must not broadcast a vector type")(static_cast <bool> (!CVT.isVector() && "Must not broadcast a vector type"
) ? void (0) : __assert_fail ("!CVT.isVector() && \"Must not broadcast a vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6956, __extension__ __PRETTY_FUNCTION__))
;
6957
6958 // Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
6959 // For size optimization, also splat v2f64 and v2i64, and for size opt
6960 // with AVX2, also splat i8 and i16.
6961 // With pattern matching, the VBROADCAST node may become a VMOVDDUP.
6962 if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
6963 (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
6964 const Constant *C = nullptr;
6965 if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
6966 C = CI->getConstantIntValue();
6967 else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
6968 C = CF->getConstantFPValue();
6969
6970 assert(C && "Invalid constant type")(static_cast <bool> (C && "Invalid constant type"
) ? void (0) : __assert_fail ("C && \"Invalid constant type\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 6970, __extension__ __PRETTY_FUNCTION__))
;
6971
6972 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6973 SDValue CP =
6974 DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
6975 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6976 Ld = DAG.getLoad(
6977 CVT, dl, DAG.getEntryNode(), CP,
6978 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6979 Alignment);
6980
6981 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6982 }
6983 }
6984
6985 bool IsLoad = ISD::isNormalLoad(Ld.getNode());
6986
6987 // Handle AVX2 in-register broadcasts.
6988 if (!IsLoad && Subtarget.hasInt256() &&
6989 (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
6990 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6991
6992 // The scalar source must be a normal load.
6993 if (!IsLoad)
6994 return SDValue();
6995
6996 if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
6997 (Subtarget.hasVLX() && ScalarSize == 64))
6998 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6999
7000 // The integer check is needed for the 64-bit into 128-bit so it doesn't match
7001 // double since there is no vbroadcastsd xmm
7002 if (Subtarget.hasInt256() && Ld.getValueType().isInteger()) {
7003 if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
7004 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
7005 }
7006
7007 // Unsupported broadcast.
7008 return SDValue();
7009}
7010
7011/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
7012/// underlying vector and index.
7013///
7014/// Modifies \p ExtractedFromVec to the real vector and returns the real
7015/// index.
7016static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
7017 SDValue ExtIdx) {
7018 int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
7019 if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
7020 return Idx;
7021
7022 // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
7023 // lowered this:
7024 // (extract_vector_elt (v8f32 %1), Constant<6>)
7025 // to:
7026 // (extract_vector_elt (vector_shuffle<2,u,u,u>
7027 // (extract_subvector (v8f32 %0), Constant<4>),
7028 // undef)
7029 // Constant<0>)
7030 // In this case the vector is the extract_subvector expression and the index
7031 // is 2, as specified by the shuffle.
7032 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
7033 SDValue ShuffleVec = SVOp->getOperand(0);
7034 MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
7035 assert(ShuffleVecVT.getVectorElementType() ==(static_cast <bool> (ShuffleVecVT.getVectorElementType(
) == ExtractedFromVec.getSimpleValueType().getVectorElementType
()) ? void (0) : __assert_fail ("ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType().getVectorElementType()"
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 7036, __extension__ __PRETTY_FUNCTION__))
7036 ExtractedFromVec.getSimpleValueType().getVectorElementType())(static_cast <bool> (ShuffleVecVT.getVectorElementType(
) == ExtractedFromVec.getSimpleValueType().getVectorElementType
()) ? void (0) : __assert_fail ("ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType().getVectorElementType()"
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 7036, __extension__ __PRETTY_FUNCTION__))
;
7037
7038 int ShuffleIdx = SVOp->getMaskElt(Idx);
7039 if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
7040 ExtractedFromVec = ShuffleVec;
7041 return ShuffleIdx;
7042 }
7043 return Idx;
7044}
7045
7046static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
7047 MVT VT = Op.getSimpleValueType();
7048
7049 // Skip if insert_vec_elt is not supported.
7050 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7051 if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
7052 return SDValue();
7053
7054 SDLoc DL(Op);
7055 unsigned NumElems = Op.getNumOperands();
7056
7057 SDValue VecIn1;
7058 SDValue VecIn2;
7059 SmallVector<unsigned, 4> InsertIndices;
7060 SmallVector<int, 8> Mask(NumElems, -1);
7061
7062 for (unsigned i = 0; i != NumElems; ++i) {
7063 unsigned Opc = Op.getOperand(i).getOpcode();
7064
7065 if (Opc == ISD::UNDEF)
7066 continue;
7067
7068 if (Opc != ISD::EXTRACT_VECTOR_ELT) {
7069 // Quit if more than 1 elements need inserting.
7070 if (InsertIndices.size() > 1)
7071 return SDValue();
7072
7073 InsertIndices.push_back(i);
7074 continue;
7075 }
7076
7077 SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
7078 SDValue ExtIdx = Op.getOperand(i).getOperand(1);
7079
7080 // Quit if non-constant index.
7081 if (!isa<ConstantSDNode>(ExtIdx))
7082 return SDValue();
7083 int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
7084
7085 // Quit if extracted from vector of different type.
7086 if (ExtractedFromVec.getValueType() != VT)
7087 return SDValue();
7088
7089 if (!VecIn1.getNode())
7090 VecIn1 = ExtractedFromVec;
7091 else if (VecIn1 != ExtractedFromVec) {
7092 if (!VecIn2.getNode())
7093 VecIn2 = ExtractedFromVec;
7094 else if (VecIn2 != ExtractedFromVec)
7095 // Quit if more than 2 vectors to shuffle
7096 return SDValue();
7097 }
7098
7099 if (ExtractedFromVec == VecIn1)
7100 Mask[i] = Idx;
7101 else if (ExtractedFromVec == VecIn2)
7102 Mask[i] = Idx + NumElems;
7103 }
7104
7105 if (!VecIn1.getNode())
7106 return SDValue();
7107
7108 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
7109 SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);
7110
7111 for (unsigned Idx : InsertIndices)
7112 NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
7113 DAG.getIntPtrConstant(Idx, DL));
7114
7115 return NV;
7116}
7117
7118static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
7119 assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&(static_cast <bool> (ISD::isBuildVectorOfConstantSDNodes
(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 &&
"Can not convert non-constant vector") ? void (0) : __assert_fail
("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 7121, __extension__ __PRETTY_FUNCTION__))
7120 Op.getScalarValueSizeInBits() == 1 &&(static_cast <bool> (ISD::isBuildVectorOfConstantSDNodes
(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 &&
"Can not convert non-constant vector") ? void (0) : __assert_fail
("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 7121, __extension__ __PRETTY_FUNCTION__))
7121 "Can not convert non-constant vector")(static_cast <bool> (ISD::isBuildVectorOfConstantSDNodes
(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 &&
"Can not convert non-constant vector") ? void (0) : __assert_fail
("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 7121, __extension__ __PRETTY_FUNCTION__))
;
7122 uint64_t Immediate = 0;
7123 for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
7124 SDValue In = Op.getOperand(idx);
7125 if (!In.isUndef())
7126 Immediate |= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
7127 }
7128 SDLoc dl(Op);
7129 MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8));
7130 return DAG.getConstant(Immediate, dl, VT);
7131}
7132// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
7133static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG,
7134 const X86Subtarget &Subtarget) {
7135
7136 MVT VT = Op.getSimpleValueType();
7137 assert((VT.getVectorElementType() == MVT::i1) &&(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) && "Unexpected type in LowerBUILD_VECTORvXi1!") ?
void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) && \"Unexpected type in LowerBUILD_VECTORvXi1!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 7138, __extension__ __PRETTY_FUNCTION__))
7138 "Unexpected type in LowerBUILD_VECTORvXi1!")(static_cast <bool> ((VT.getVectorElementType() == MVT::
i1) && "Unexpected type in LowerBUILD_VECTORvXi1!") ?
void (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) && \"Unexpected type in LowerBUILD_VECTORvXi1!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 7138, __extension__ __PRETTY_FUNCTION__))
;
7139
7140 SDLoc dl(Op);
7141 if (ISD::isBuildVectorAllZeros(Op.getNode()))
7142 return Op;
7143
7144 if (ISD::isBuildVectorAllOnes(Op.getNode()))
7145 return Op;
7146
7147 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
7148 if (VT == MVT::v64i1 && !Subtarget.is64Bit()) {
7149 // Split the pieces.
7150 SDValue Lower =
7151 DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(0, 32));
7152 SDValue Upper =
7153 DAG.getBuildVector(MVT::v32i1, dl, Op.getNode()->ops().slice(32, 32));
7154 // We have to manually lower both halves so getNode doesn't try to
7155 // reassemble the build_vector.
7156 Lower = LowerBUILD_VECTORvXi1(Lower, DAG, Subtarget);
7157 Upper = LowerBUILD_VECTORvXi1(Upper, DAG, Subtarget);
7158 return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lower, Upper);
7159 }
7160 SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
7161 if (Imm.getValueSizeInBits() == VT.getSizeInBits())
7162 return DAG.getBitcast(VT, Imm);
7163 SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
7164 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
7165 DAG.getIntPtrConstant(0, dl));
7166 }
7167
7168 // Vector has one or more non-const elements
7169 uint64_t Immediate = 0;
7170 SmallVector<unsigned, 16> NonConstIdx;
7171 bool IsSplat = true;
7172 bool HasConstElts = false;
7173 int SplatIdx = -1;
7174 for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
7175 SDValue In = Op.getOperand(idx);
7176 if (In.isUndef())
7177 continue;
7178 if (!isa<ConstantSDNode>(In))
7179 NonConstIdx.push_back(idx);
7180 else {
7181 Immediate |= (cast<ConstantSDNode>(In)->getZExtValue() & 0x1) << idx;
7182 HasConstElts = true;
7183 }
7184 if (SplatIdx < 0)
7185 SplatIdx = idx;
7186 else if (In != Op.getOperand(SplatIdx))
7187 IsSplat = false;
7188 }
7189
7190 // for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
7191 if (IsSplat)
7192 return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx),
7193 DAG.getConstant(1, dl, VT),
7194 DAG.getConstant(0, dl, VT));
7195
7196 // insert elements one by one
7197 SDValue DstVec;
7198 SDValue Imm;
7199 if (Immediate) {
7200 MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
7201 Imm = DAG.getConstant(Immediate, dl, ImmVT);
7202 }
7203 else if (HasConstElts)
7204 Imm = DAG.getConstant(0, dl, VT);
7205 else
7206 Imm = DAG.getUNDEF(VT);
7207 if (Imm.getValueSizeInBits() == VT.getSizeInBits())
7208 DstVec = DAG.getBitcast(VT, Imm);
7209 else {
7210 SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
7211 DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
7212 DAG.getIntPtrConstant(0, dl));
7213 }
7214
7215 for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) {
7216 unsigned InsertIdx = NonConstIdx[i];
7217 DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
7218 Op.getOperand(InsertIdx),
7219 DAG.getIntPtrConstant(InsertIdx, dl));
7220 }
7221 return DstVec;
7222}
7223
7224/// \brief Return true if \p N implements a horizontal binop and return the
7225/// operands for the horizontal binop into V0 and V1.
7226///
7227/// This is a helper function of LowerToHorizontalOp().
7228/// This function checks that the build_vector \p N in input implements a
7229/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
7230/// operation to match.
7231/// For example, if \p Opcode is equal to ISD::ADD, then this function
7232/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
7233/// is equal to ISD::SUB, then this function checks if this is a horizontal
7234/// arithmetic sub.
7235///
7236/// This function only analyzes elements of \p N whose indices are
7237/// in range [BaseIdx, LastIdx).
7238static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
7239 SelectionDAG &DAG,
7240 unsigned BaseIdx, unsigned LastIdx,
7241 SDValue &V0, SDValue &V1) {
7242 EVT VT = N->getValueType(0);
7243
7244 assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!")(static_cast <bool> (BaseIdx * 2 <= LastIdx &&
"Invalid Indices in input!") ? void (0) : __assert_fail ("BaseIdx * 2 <= LastIdx && \"Invalid Indices in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 7244, __extension__ __PRETTY_FUNCTION__))
;
7245 assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&(static_cast <bool> (VT.isVector() && VT.getVectorNumElements
() >= LastIdx && "Invalid Vector in input!") ? void
(0) : __assert_fail ("VT.isVector() && VT.getVectorNumElements() >= LastIdx && \"Invalid Vector in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 7246, __extension__ __PRETTY_FUNCTION__))
7246 "Invalid Vector in input!")(static_cast <bool> (VT.isVector() && VT.getVectorNumElements
() >= LastIdx && "Invalid Vector in input!") ? void
(0) : __assert_fail ("VT.isVector() && VT.getVectorNumElements() >= LastIdx && \"Invalid Vector in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 7246, __extension__ __PRETTY_FUNCTION__))
;
7247
7248 bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
7249 bool CanFold = true;
7250 unsigned ExpectedVExtractIdx = BaseIdx;
7251 unsigned NumElts = LastIdx - BaseIdx;
7252 V0 = DAG.getUNDEF(VT);
7253 V1 = DAG.getUNDEF(VT);
7254
7255 // Check if N implements a horizontal binop.
7256 for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
7257 SDValue Op = N->getOperand(i + BaseIdx);
7258
7259 // Skip UNDEFs.
7260 if (Op->isUndef()) {
7261 // Update the expected vector extract index.
7262 if (i * 2 == NumElts)
7263 ExpectedVExtractIdx = BaseIdx;
7264 ExpectedVExtractIdx += 2;
7265 continue;
7266 }
7267
7268 CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
7269
7270 if (!CanFold)
7271 break;
7272
7273 SDValue Op0 = Op.getOperand(0);
7274 SDValue Op1 = Op.getOperand(1);
7275
7276 // Try to match the following pattern:
7277 // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
7278 CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7279 Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7280 Op0.getOperand(0) == Op1.getOperand(0) &&
7281 isa<ConstantSDNode>(Op0.getOperand(1)) &&
7282 isa<ConstantSDNode>(Op1.getOperand(1)));
7283 if (!CanFold)
7284 break;
7285
7286 unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
7287 unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
7288
7289 if (i * 2 < NumElts) {
7290 if (V0.isUndef()) {
7291 V0 = Op0.getOperand(0);
7292 if (V0.getValueType() != VT)
7293 return false;
7294 }
7295 } else {
7296 if (V1.isUndef()) {
7297 V1 = Op0.getOperand(0);
7298 if (V1.getValueType() != VT)
7299 return false;
7300 }
7301 if (i * 2 == NumElts)
7302 ExpectedVExtractIdx = BaseIdx;
7303 }
7304
7305 SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
7306 if (I0 == ExpectedVExtractIdx)
7307 CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
7308 else if (IsCommutable && I1 == ExpectedVExtractIdx) {
7309 // Try to match the following dag sequence:
7310 // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
7311 CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
7312 } else
7313 CanFold = false;
7314
7315 ExpectedVExtractIdx += 2;
7316 }
7317
7318 return CanFold;
7319}
7320
7321/// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
7322/// a concat_vector.
7323///
7324/// This is a helper function of LowerToHorizontalOp().
7325/// This function expects two 256-bit vectors called V0 and V1.
7326/// At first, each vector is split into two separate 128-bit vectors.
7327/// Then, the resulting 128-bit vectors are used to implement two
7328/// horizontal binary operations.
7329///
7330/// The kind of horizontal binary operation is defined by \p X86Opcode.
7331///
7332/// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
7333/// the two new horizontal binop.
7334/// When Mode is set, the first horizontal binop dag node would take as input
7335/// the lower 128-bit of V0 and the upper 128-bit of V0. The second
7336/// horizontal binop dag node would take as input the lower 128-bit of V1
7337/// and the upper 128-bit of V1.
7338/// Example:
7339/// HADD V0_LO, V0_HI
7340/// HADD V1_LO, V1_HI
7341///
7342/// Otherwise, the first horizontal binop dag node takes as input the lower
7343/// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
7344/// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
7345/// Example:
7346/// HADD V0_LO, V1_LO
7347/// HADD V0_HI, V1_HI
7348///
7349/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
7350/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
7351/// the upper 128-bits of the result.
7352static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
7353 const SDLoc &DL, SelectionDAG &DAG,
7354 unsigned X86Opcode, bool Mode,
7355 bool isUndefLO, bool isUndefHI) {
7356 MVT VT = V0.getSimpleValueType();
7357 assert(VT.is256BitVector() && VT == V1.getSimpleValueType() &&(static_cast <bool> (VT.is256BitVector() && VT ==
V1.getSimpleValueType() && "Invalid nodes in input!"
) ? void (0) : __assert_fail ("VT.is256BitVector() && VT == V1.getSimpleValueType() && \"Invalid nodes in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 7358, __extension__ __PRETTY_FUNCTION__))
7358 "Invalid nodes in input!")(static_cast <bool> (VT.is256BitVector() && VT ==
V1.getSimpleValueType() && "Invalid nodes in input!"
) ? void (0) : __assert_fail ("VT.is256BitVector() && VT == V1.getSimpleValueType() && \"Invalid nodes in input!\""
, "/build/llvm-toolchain-snapshot-6.0~svn321639/lib/Target/X86/X86ISelLowering.cpp"
, 7358, __extension__ __PRETTY_FUNCTION__))
;
7359
7360 unsigned NumElts = VT.getVectorNumElements();
7361 SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL);
7362 SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL);
7363 SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL);
7364 SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL);
7365 MVT NewVT = V0_LO.getSimpleValueType();
7366
7367 SDValue LO = DAG.getUNDEF(NewVT);
7368 SDValue HI = DAG.getUNDEF(NewVT);
7369
7370 if (Mode) {
7371 // Don't emit a horizontal binop if the result is expected to be UNDEF.
7372 if (!isUndefLO && !V0->isUndef())
7373 LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
7374 if (!isUndefHI && !V1->isUndef())
7375 HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
7376 } else {
7377 // Don't emit a horizontal binop if the result is expected to be UNDEF.
7378 if (!isUndefLO && (!V0_LO->isUndef() || !V1_LO->isUndef()))
7379 LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
7380
7381 if (!isUndefHI && (!V0_HI->isUndef() || !V1_HI->isUndef()))
7382 HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
7383 }
7384
7385 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
7386}
7387
7388/// Returns true iff \p BV builds a vector with the result equivalent to
7389/// the result of ADDSUB operation.
7390/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation
7391/// are written to the parameters \p Opnd0 and \p Opnd1.
7392static bool isAddSub(const BuildVectorSDNode *BV,
7393 const X86Subtarget &Subtarget, SelectionDAG &DAG,
7394 SDValue &Opnd0, SDValue &Opnd1,
7395 unsigned &NumExtracts) {
7396
7397 MVT VT = BV->getSimpleValueType(0);
7398 if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
7399 (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
7400 (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
7401 return false;
7402
7403 unsigned NumElts = VT.getVectorNumElements();
7404 SDValue InVec0 = DAG.getUNDEF(VT);
7405 SDValue InVec1 = DAG.getUNDEF(VT);
7406
7407 NumExtracts = 0;
7408
7409 // Odd-numbered elements in the input build vector are obtained from
7410 // adding two integer/float elements.
7411 // Even-numbered elements in the input build vector are obtained from
7412 // subtracting two integer/float elements.
7413 unsigned ExpectedOpcode = ISD::FSUB;
7414 unsigned NextExpectedOpcode = ISD::FADD;
7415 bool AddFound = false;
7416 bool SubFound = false;
7417
7418 for (unsigned i = 0, e = NumElts; i != e; ++i) {
7419 SDValue Op = BV->getOperand(i);
7420
7421 // Skip 'undef' values.
7422 unsigned Opcode = Op.getOpcode();
7423 if (Opcode == ISD::UNDEF) {
7424 std::swap(ExpectedOpcode, NextExpectedOpcode);
7425 continue;
7426 }
7427
7428 // Early exit if we found an unexpected opcode.
7429 if (Opcode != ExpectedOpcode)
7430 return false;
7431
7432 SDValue Op0 = Op.getOperand(0);
7433 SDValue Op1 = Op.getOperand(1);
7434
7435 // Try to match the following pattern:
7436 // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
7437 // Early exit if we cannot match that sequence.
7438 if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7439 Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7440 !isa<ConstantSDNode>(Op0.getOperand(1)) ||
7441 !isa<ConstantSDNode>(Op1.getOperand(1)) ||
7442 Op0.getOperand(1) != Op1.getOperand(1))
7443 return false;
7444
7445 unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
7446 if (I0 != i)
7447 return false;
7448
7449 // We found a valid add/sub node. Update the information accordingly.
7450 if (i & 1)
7451 AddFound = true;
7452 else
7453 SubFound = true;
7454
7455 // Update InVec0 and InVec1.
7456 if (InVec0.isUndef()) {
7457 InVec0 = Op0.getOperand(0);
7458 if (InVec0.getSimpleValueType() != VT)
7459 return false;
7460 }
7461 if (InVec1.isUndef()) {
7462 InVec1 = Op1.getOperand(0);
7463 if (InVec1.getSimpleValueType() != VT)
7464 return false;
7465 }
7466
7467 // Make sure that operands in input to each add/sub node always
7468 // come from a same pair of vectors.
7469 if (InVec0 != Op0.getOperand(0)) {
7470 if (ExpectedOpcode == ISD::FSUB)
7471 return false;
7472
7473 // FADD is commutable. Try to commute the operands
7474 // and then test again.
7475 std::swap(Op0, Op1);
7476 if (InVec0 != Op0.getOperand(0))
7477 return false;
7478 }
7479
7480 if (InVec1 != Op1.getOperand(0))
7481 return false;
7482
7483 // Update the pair of expected opcodes.
7484 std::swap(ExpectedOpcode, NextExpectedOpcode);
7485
7486 // Increment the number of extractions done.
7487 ++NumExtracts;
7488 }
7489
7490 // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
7491 if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef())
7492 return false;
7493
7494 Opnd0 = InVec0;
7495 Opnd1 = InVec1;
7496 return