Bug Summary

File:lib/Target/X86/X86ISelLowering.cpp
Warning:line 26903, column 5
Value stored to 'AllowIntDomain' is never read

Annotated Source Code

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86ISelLowering.h"
16#include "Utils/X86ShuffleDecode.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86ShuffleDecodeConstantPool.h"
23#include "X86TargetMachine.h"
24#include "X86TargetObjectFile.h"
25#include "llvm/ADT/SmallBitVector.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringExtras.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/CodeGen/IntrinsicLowering.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineJumpTableInfo.h"
36#include "llvm/CodeGen/MachineModuleInfo.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/WinEHFuncInfo.h"
39#include "llvm/IR/CallSite.h"
40#include "llvm/IR/CallingConv.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/Function.h"
44#include "llvm/IR/GlobalAlias.h"
45#include "llvm/IR/GlobalVariable.h"
46#include "llvm/IR/Instructions.h"
47#include "llvm/IR/Intrinsics.h"
48#include "llvm/MC/MCAsmInfo.h"
49#include "llvm/MC/MCContext.h"
50#include "llvm/MC/MCExpr.h"
51#include "llvm/MC/MCSymbol.h"
52#include "llvm/Support/CommandLine.h"
53#include "llvm/Support/Debug.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/MathExtras.h"
56#include "llvm/Target/TargetLowering.h"
57#include "llvm/Target/TargetOptions.h"
58#include <algorithm>
59#include <bitset>
60#include <cctype>
61#include <numeric>
62using namespace llvm;
63
64#define DEBUG_TYPE"x86-isel" "x86-isel"
65
66STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
67
68static cl::opt<bool> ExperimentalVectorWideningLegalization(
69 "x86-experimental-vector-widening-legalization", cl::init(false),
70 cl::desc("Enable an experimental vector type legalization through widening "
71 "rather than promotion."),
72 cl::Hidden);
73
74static cl::opt<int> ExperimentalPrefLoopAlignment(
75 "x86-experimental-pref-loop-alignment", cl::init(4),
76 cl::desc("Sets the preferable loop alignment for experiments "
77 "(the last x86-experimental-pref-loop-alignment bits"
78 " of the loop header PC will be 0)."),
79 cl::Hidden);
80
81X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
82 const X86Subtarget &STI)
83 : TargetLowering(TM), Subtarget(STI) {
84 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
85 X86ScalarSSEf64 = Subtarget.hasSSE2();
86 X86ScalarSSEf32 = Subtarget.hasSSE1();
87 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
88
89 // Set up the TargetLowering object.
90
91 // X86 is weird. It always uses i8 for shift amounts and setcc results.
92 setBooleanContents(ZeroOrOneBooleanContent);
93 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
94 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
95
96 // For 64-bit, since we have so many registers, use the ILP scheduler.
97 // For 32-bit, use the register pressure specific scheduling.
98 // For Atom, always use ILP scheduling.
99 if (Subtarget.isAtom())
100 setSchedulingPreference(Sched::ILP);
101 else if (Subtarget.is64Bit())
102 setSchedulingPreference(Sched::ILP);
103 else
104 setSchedulingPreference(Sched::RegPressure);
105 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
106 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
107
108 // Bypass expensive divides and use cheaper ones.
109 if (TM.getOptLevel() >= CodeGenOpt::Default) {
110 if (Subtarget.hasSlowDivide32())
111 addBypassSlowDiv(32, 8);
112 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
113 addBypassSlowDiv(64, 32);
114 }
115
116 if (Subtarget.isTargetKnownWindowsMSVC() ||
117 Subtarget.isTargetWindowsItanium()) {
118 // Setup Windows compiler runtime calls.
119 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
120 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
121 setLibcallName(RTLIB::SREM_I64, "_allrem");
122 setLibcallName(RTLIB::UREM_I64, "_aullrem");
123 setLibcallName(RTLIB::MUL_I64, "_allmul");
124 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
125 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
126 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
127 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
128 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
129 }
130
131 if (Subtarget.isTargetDarwin()) {
132 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
133 setUseUnderscoreSetJmp(false);
134 setUseUnderscoreLongJmp(false);
135 } else if (Subtarget.isTargetWindowsGNU()) {
136 // MS runtime is weird: it exports _setjmp, but longjmp!
137 setUseUnderscoreSetJmp(true);
138 setUseUnderscoreLongJmp(false);
139 } else {
140 setUseUnderscoreSetJmp(true);
141 setUseUnderscoreLongJmp(true);
142 }
143
144 // Set up the register classes.
145 addRegisterClass(MVT::i8, &X86::GR8RegClass);
146 addRegisterClass(MVT::i16, &X86::GR16RegClass);
147 addRegisterClass(MVT::i32, &X86::GR32RegClass);
148 if (Subtarget.is64Bit())
149 addRegisterClass(MVT::i64, &X86::GR64RegClass);
150
151 for (MVT VT : MVT::integer_valuetypes())
152 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
153
154 // We don't accept any truncstore of integer registers.
155 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
156 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
157 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
158 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
159 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
160 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
161
162 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
163
164 // SETOEQ and SETUNE require checking two conditions.
165 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
166 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
167 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
168 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
169 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
170 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
171
172 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
173 // operation.
174 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
175 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
176 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
177
178 if (Subtarget.is64Bit()) {
179 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
180 // f32/f64 are legal, f80 is custom.
181 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
182 else
183 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
184 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
185 } else if (!Subtarget.useSoftFloat()) {
186 // We have an algorithm for SSE2->double, and we turn this into a
187 // 64-bit FILD followed by conditional FADD for other targets.
188 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
189 // We have an algorithm for SSE2, and we turn this into a 64-bit
190 // FILD or VCVTUSI2SS/SD for other targets.
191 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
192 }
193
194 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
195 // this operation.
196 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
197 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
198
199 if (!Subtarget.useSoftFloat()) {
200 // SSE has no i16 to fp conversion, only i32.
201 if (X86ScalarSSEf32) {
202 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
203 // f32 and f64 cases are Legal, f80 case is not
204 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
205 } else {
206 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
207 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
208 }
209 } else {
210 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
211 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
212 }
213
214 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
215 // this operation.
216 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
217 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
218
219 if (!Subtarget.useSoftFloat()) {
220 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
221 // are Legal, f80 is custom lowered.
222 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
223 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
224
225 if (X86ScalarSSEf32) {
226 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
227 // f32 and f64 cases are Legal, f80 case is not
228 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
229 } else {
230 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
231 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
232 }
233 } else {
234 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
235 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
236 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
237 }
238
239 // Handle FP_TO_UINT by promoting the destination to a larger signed
240 // conversion.
241 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
242 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
243 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
244
245 if (Subtarget.is64Bit()) {
246 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
247 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
248 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
249 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
250 } else {
251 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
252 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
253 }
254 } else if (!Subtarget.useSoftFloat()) {
255 // Since AVX is a superset of SSE3, only check for SSE here.
256 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
257 // Expand FP_TO_UINT into a select.
258 // FIXME: We would like to use a Custom expander here eventually to do
259 // the optimal thing for SSE vs. the default expansion in the legalizer.
260 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
261 else
262 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
263 // With SSE3 we can use fisttpll to convert to a signed i64; without
264 // SSE, we're stuck with a fistpll.
265 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
266
267 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
268 }
269
270 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
271 if (!X86ScalarSSEf64) {
272 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
273 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
274 if (Subtarget.is64Bit()) {
275 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
276 // Without SSE, i64->f64 goes through memory.
277 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
278 }
279 } else if (!Subtarget.is64Bit())
280 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
281
282 // Scalar integer divide and remainder are lowered to use operations that
283 // produce two results, to match the available instructions. This exposes
284 // the two-result form to trivial CSE, which is able to combine x/y and x%y
285 // into a single instruction.
286 //
287 // Scalar integer multiply-high is also lowered to use two-result
288 // operations, to match the available instructions. However, plain multiply
289 // (low) operations are left as Legal, as there are single-result
290 // instructions for this in x86. Using the two-result multiply instructions
291 // when both high and low results are needed must be arranged by dagcombine.
292 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
293 setOperationAction(ISD::MULHS, VT, Expand);
294 setOperationAction(ISD::MULHU, VT, Expand);
295 setOperationAction(ISD::SDIV, VT, Expand);
296 setOperationAction(ISD::UDIV, VT, Expand);
297 setOperationAction(ISD::SREM, VT, Expand);
298 setOperationAction(ISD::UREM, VT, Expand);
299 }
300
301 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
302 if (VT == MVT::i64 && !Subtarget.is64Bit())
303 continue;
304 // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
305 setOperationAction(ISD::ADDC, VT, Custom);
306 setOperationAction(ISD::ADDE, VT, Custom);
307 setOperationAction(ISD::SUBC, VT, Custom);
308 setOperationAction(ISD::SUBE, VT, Custom);
309 }
310
311 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
312 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
313 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
314 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
315 setOperationAction(ISD::BR_CC, VT, Expand);
316 setOperationAction(ISD::SELECT_CC, VT, Expand);
317 }
318 if (Subtarget.is64Bit())
319 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
320 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
321 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
322 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
323 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
324
325 setOperationAction(ISD::FREM , MVT::f32 , Expand);
326 setOperationAction(ISD::FREM , MVT::f64 , Expand);
327 setOperationAction(ISD::FREM , MVT::f80 , Expand);
328 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
329
330 // Promote the i8 variants and force them on up to i32 which has a shorter
331 // encoding.
332 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
333 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
334 if (!Subtarget.hasBMI()) {
335 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
336 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
337 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
338 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
339 if (Subtarget.is64Bit()) {
340 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
341 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
342 }
343 }
344
345 if (Subtarget.hasLZCNT()) {
346 // When promoting the i8 variants, force them to i32 for a shorter
347 // encoding.
348 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
349 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
350 } else {
351 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
352 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
353 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
354 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
355 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
356 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
357 if (Subtarget.is64Bit()) {
358 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
359 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
360 }
361 }
362
363 // Special handling for half-precision floating point conversions.
364 // If we don't have F16C support, then lower half float conversions
365 // into library calls.
366 if (Subtarget.useSoftFloat() ||
367 (!Subtarget.hasF16C() && !Subtarget.hasAVX512())) {
368 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
369 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
370 }
371
372 // There's never any support for operations beyond MVT::f32.
373 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
374 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
375 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
376 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
377
378 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
379 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
380 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
381 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
382 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
383 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
384
385 if (Subtarget.hasPOPCNT()) {
386 setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
387 } else {
388 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
389 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
390 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
391 if (Subtarget.is64Bit())
392 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
393 }
394
395 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
396
397 if (!Subtarget.hasMOVBE())
398 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
399
400 // These should be promoted to a larger select which is supported.
401 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
402 // X86 wants to expand cmov itself.
403 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
404 setOperationAction(ISD::SELECT, VT, Custom);
405 setOperationAction(ISD::SETCC, VT, Custom);
406 }
407 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
408 if (VT == MVT::i64 && !Subtarget.is64Bit())
409 continue;
410 setOperationAction(ISD::SELECT, VT, Custom);
411 setOperationAction(ISD::SETCC, VT, Custom);
412 setOperationAction(ISD::SETCCE, VT, Custom);
413 }
414 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
415 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
416 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
417 // support continuation, user-level threading, and etc.. As a result, no
418 // other SjLj exception interfaces are implemented and please don't build
419 // your own exception handling based on them.
420 // LLVM/Clang supports zero-cost DWARF exception handling.
421 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
422 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
423 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
424 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
425 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
426
427 // Darwin ABI issue.
428 for (auto VT : { MVT::i32, MVT::i64 }) {
429 if (VT == MVT::i64 && !Subtarget.is64Bit())
430 continue;
431 setOperationAction(ISD::ConstantPool , VT, Custom);
432 setOperationAction(ISD::JumpTable , VT, Custom);
433 setOperationAction(ISD::GlobalAddress , VT, Custom);
434 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
435 setOperationAction(ISD::ExternalSymbol , VT, Custom);
436 setOperationAction(ISD::BlockAddress , VT, Custom);
437 }
438 // 64-bit addm sub, shl, sra, srl (iff 32-bit x86)
439 for (auto VT : { MVT::i32, MVT::i64 }) {
440 if (VT == MVT::i64 && !Subtarget.is64Bit())
441 continue;
442 setOperationAction(ISD::SHL_PARTS, VT, Custom);
443 setOperationAction(ISD::SRA_PARTS, VT, Custom);
444 setOperationAction(ISD::SRL_PARTS, VT, Custom);
445 }
446
447 if (Subtarget.hasSSE1())
448 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
449
450 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
451
452 // Expand certain atomics
453 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
454 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
455 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
456 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
457 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
458 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
459 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
460 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
461 }
462
463 if (Subtarget.hasCmpxchg16b()) {
464 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
465 }
466
467 // FIXME - use subtarget debug flags
468 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
469 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
470 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
471 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
472 }
473
474 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
475 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
476
477 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
478 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
479
480 setOperationAction(ISD::TRAP, MVT::Other, Legal);
481 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
482
483 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
484 setOperationAction(ISD::VASTART , MVT::Other, Custom);
485 setOperationAction(ISD::VAEND , MVT::Other, Expand);
486 bool Is64Bit = Subtarget.is64Bit();
487 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
488 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
489
490 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
491 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
492
493 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
494
495 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
496 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
497 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
498
499 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
500 // f32 and f64 use SSE.
501 // Set up the FP register classes.
502 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
503 : &X86::FR32RegClass);
504 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
505 : &X86::FR64RegClass);
506
507 for (auto VT : { MVT::f32, MVT::f64 }) {
508 // Use ANDPD to simulate FABS.
509 setOperationAction(ISD::FABS, VT, Custom);
510
511 // Use XORP to simulate FNEG.
512 setOperationAction(ISD::FNEG, VT, Custom);
513
514 // Use ANDPD and ORPD to simulate FCOPYSIGN.
515 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
516
517 // We don't support sin/cos/fmod
518 setOperationAction(ISD::FSIN , VT, Expand);
519 setOperationAction(ISD::FCOS , VT, Expand);
520 setOperationAction(ISD::FSINCOS, VT, Expand);
521 }
522
523 // Lower this to MOVMSK plus an AND.
524 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
525 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
526
527 // Expand FP immediates into loads from the stack, except for the special
528 // cases we handle.
529 addLegalFPImmediate(APFloat(+0.0)); // xorpd
530 addLegalFPImmediate(APFloat(+0.0f)); // xorps
531 } else if (UseX87 && X86ScalarSSEf32) {
532 // Use SSE for f32, x87 for f64.
533 // Set up the FP register classes.
534 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
535 : &X86::FR32RegClass);
536 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
537
538 // Use ANDPS to simulate FABS.
539 setOperationAction(ISD::FABS , MVT::f32, Custom);
540
541 // Use XORP to simulate FNEG.
542 setOperationAction(ISD::FNEG , MVT::f32, Custom);
543
544 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
545
546 // Use ANDPS and ORPS to simulate FCOPYSIGN.
547 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
548 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
549
550 // We don't support sin/cos/fmod
551 setOperationAction(ISD::FSIN , MVT::f32, Expand);
552 setOperationAction(ISD::FCOS , MVT::f32, Expand);
553 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
554
555 // Special cases we handle for FP constants.
556 addLegalFPImmediate(APFloat(+0.0f)); // xorps
557 addLegalFPImmediate(APFloat(+0.0)); // FLD0
558 addLegalFPImmediate(APFloat(+1.0)); // FLD1
559 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
560 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
561
562 if (!TM.Options.UnsafeFPMath) {
563 setOperationAction(ISD::FSIN , MVT::f64, Expand);
564 setOperationAction(ISD::FCOS , MVT::f64, Expand);
565 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
566 }
567 } else if (UseX87) {
568 // f32 and f64 in x87.
569 // Set up the FP register classes.
570 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
571 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
572
573 for (auto VT : { MVT::f32, MVT::f64 }) {
574 setOperationAction(ISD::UNDEF, VT, Expand);
575 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
576
577 if (!TM.Options.UnsafeFPMath) {
578 setOperationAction(ISD::FSIN , VT, Expand);
579 setOperationAction(ISD::FCOS , VT, Expand);
580 setOperationAction(ISD::FSINCOS, VT, Expand);
581 }
582 }
583 addLegalFPImmediate(APFloat(+0.0)); // FLD0
584 addLegalFPImmediate(APFloat(+1.0)); // FLD1
585 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
586 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
587 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
588 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
589 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
590 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
591 }
592
593 // We don't support FMA.
594 setOperationAction(ISD::FMA, MVT::f64, Expand);
595 setOperationAction(ISD::FMA, MVT::f32, Expand);
596
597 // Long double always uses X87, except f128 in MMX.
598 if (UseX87) {
599 if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
600 addRegisterClass(MVT::f128, &X86::FR128RegClass);
601 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
602 setOperationAction(ISD::FABS , MVT::f128, Custom);
603 setOperationAction(ISD::FNEG , MVT::f128, Custom);
604 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
605 }
606
607 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
608 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
609 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
610 {
611 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
612 addLegalFPImmediate(TmpFlt); // FLD0
613 TmpFlt.changeSign();
614 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
615
616 bool ignored;
617 APFloat TmpFlt2(+1.0);
618 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
619 &ignored);
620 addLegalFPImmediate(TmpFlt2); // FLD1
621 TmpFlt2.changeSign();
622 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
623 }
624
625 if (!TM.Options.UnsafeFPMath) {
626 setOperationAction(ISD::FSIN , MVT::f80, Expand);
627 setOperationAction(ISD::FCOS , MVT::f80, Expand);
628 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
629 }
630
631 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
632 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
633 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
634 setOperationAction(ISD::FRINT, MVT::f80, Expand);
635 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
636 setOperationAction(ISD::FMA, MVT::f80, Expand);
637 }
638
639 // Always use a library call for pow.
640 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
641 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
642 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
643
644 setOperationAction(ISD::FLOG, MVT::f80, Expand);
645 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
646 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
647 setOperationAction(ISD::FEXP, MVT::f80, Expand);
648 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
649 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
650 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
651
652 // Some FP actions are always expanded for vector types.
653 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
654 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
655 setOperationAction(ISD::FSIN, VT, Expand);
656 setOperationAction(ISD::FSINCOS, VT, Expand);
657 setOperationAction(ISD::FCOS, VT, Expand);
658 setOperationAction(ISD::FREM, VT, Expand);
659 setOperationAction(ISD::FPOWI, VT, Expand);
660 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
661 setOperationAction(ISD::FPOW, VT, Expand);
662 setOperationAction(ISD::FLOG, VT, Expand);
663 setOperationAction(ISD::FLOG2, VT, Expand);
664 setOperationAction(ISD::FLOG10, VT, Expand);
665 setOperationAction(ISD::FEXP, VT, Expand);
666 setOperationAction(ISD::FEXP2, VT, Expand);
667 }
668
669 // First set operation action for all vector types to either promote
670 // (for widening) or expand (for scalarization). Then we will selectively
671 // turn on ones that can be effectively codegen'd.
672 for (MVT VT : MVT::vector_valuetypes()) {
673 setOperationAction(ISD::SDIV, VT, Expand);
674 setOperationAction(ISD::UDIV, VT, Expand);
675 setOperationAction(ISD::SREM, VT, Expand);
676 setOperationAction(ISD::UREM, VT, Expand);
677 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
678 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
679 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
680 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
681 setOperationAction(ISD::FMA, VT, Expand);
682 setOperationAction(ISD::FFLOOR, VT, Expand);
683 setOperationAction(ISD::FCEIL, VT, Expand);
684 setOperationAction(ISD::FTRUNC, VT, Expand);
685 setOperationAction(ISD::FRINT, VT, Expand);
686 setOperationAction(ISD::FNEARBYINT, VT, Expand);
687 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
688 setOperationAction(ISD::MULHS, VT, Expand);
689 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
690 setOperationAction(ISD::MULHU, VT, Expand);
691 setOperationAction(ISD::SDIVREM, VT, Expand);
692 setOperationAction(ISD::UDIVREM, VT, Expand);
693 setOperationAction(ISD::CTPOP, VT, Expand);
694 setOperationAction(ISD::CTTZ, VT, Expand);
695 setOperationAction(ISD::CTLZ, VT, Expand);
696 setOperationAction(ISD::ROTL, VT, Expand);
697 setOperationAction(ISD::ROTR, VT, Expand);
698 setOperationAction(ISD::BSWAP, VT, Expand);
699 setOperationAction(ISD::SETCC, VT, Expand);
700 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
701 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
702 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
703 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
704 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
705 setOperationAction(ISD::TRUNCATE, VT, Expand);
706 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
707 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
708 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
709 setOperationAction(ISD::SELECT_CC, VT, Expand);
710 for (MVT InnerVT : MVT::vector_valuetypes()) {
711 setTruncStoreAction(InnerVT, VT, Expand);
712
713 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
714 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
715
716 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
717 // types, we have to deal with them whether we ask for Expansion or not.
718 // Setting Expand causes its own optimisation problems though, so leave
719 // them legal.
720 if (VT.getVectorElementType() == MVT::i1)
721 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
722
723 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
724 // split/scalarized right now.
725 if (VT.getVectorElementType() == MVT::f16)
726 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
727 }
728 }
729
730 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
731 // with -msoft-float, disable use of MMX as well.
732 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
733 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
734 // No operations on x86mmx supported, everything uses intrinsics.
735 }
736
737 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
738 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
739 : &X86::VR128RegClass);
740
741 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
742 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
743 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
744 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
745 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
746 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
747 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
748 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
749 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
750 }
751
752 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
753 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
754 : &X86::VR128RegClass);
755
756 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
757 // registers cannot be used even for integer operations.
758 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
759 : &X86::VR128RegClass);
760 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
761 : &X86::VR128RegClass);
762 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
763 : &X86::VR128RegClass);
764 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
765 : &X86::VR128RegClass);
766
767 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
768 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
769 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
770 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
771 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
772 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
773 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
774 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
775 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
776 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
777 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
778 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
779 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
780
781 setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
782 setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
783 setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
784 setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
785
786 setOperationAction(ISD::SETCC, MVT::v2i64, Custom);
787 setOperationAction(ISD::SETCC, MVT::v16i8, Custom);
788 setOperationAction(ISD::SETCC, MVT::v8i16, Custom);
789 setOperationAction(ISD::SETCC, MVT::v4i32, Custom);
790
791 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v16i8, Custom);
792 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v8i16, Custom);
793 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom);
794 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
795 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
796 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
797
798 setOperationAction(ISD::CTPOP, MVT::v16i8, Custom);
799 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
800 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
801 setOperationAction(ISD::CTPOP, MVT::v2i64, Custom);
802
803 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
804 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
805 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
806 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
807
808 // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
809 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
810 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
811 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
812 setOperationAction(ISD::VSELECT, VT, Custom);
813 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
814 }
815
816 // We support custom legalizing of sext and anyext loads for specific
817 // memory vector types which we can load as a scalar (or sequence of
818 // scalars) and extend in-register to a legal 128-bit vector type. For sext
819 // loads these must work with a single scalar load.
820 for (MVT VT : MVT::integer_vector_valuetypes()) {
821 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
822 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
823 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
824 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
825 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
826 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
827 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
828 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
829 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
830 }
831
832 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
833 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
834 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
835 setOperationAction(ISD::VSELECT, VT, Custom);
836
837 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
838 continue;
839
840 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
841 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
842 }
843
844 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
845 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
846 setOperationPromotedToType(ISD::AND, VT, MVT::v2i64);
847 setOperationPromotedToType(ISD::OR, VT, MVT::v2i64);
848 setOperationPromotedToType(ISD::XOR, VT, MVT::v2i64);
849 setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64);
850 setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
851 }
852
853 // Custom lower v2i64 and v2f64 selects.
854 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
855 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
856
857 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
858 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
859
860 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
861 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
862
863 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
864 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
865 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
866
867 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
868 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
869
870 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
871 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
872
873 for (MVT VT : MVT::fp_vector_valuetypes())
874 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
875
876 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
877 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
878 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
879
880 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
881 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
882 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
883
884 for (auto VT : { MVT::v8i16, MVT::v16i8 }) {
885 setOperationAction(ISD::SRL, VT, Custom);
886 setOperationAction(ISD::SHL, VT, Custom);
887 setOperationAction(ISD::SRA, VT, Custom);
888 }
889
890 // In the customized shift lowering, the legal cases in AVX2 will be
891 // recognized.
892 for (auto VT : { MVT::v4i32, MVT::v2i64 }) {
893 setOperationAction(ISD::SRL, VT, Custom);
894 setOperationAction(ISD::SHL, VT, Custom);
895 setOperationAction(ISD::SRA, VT, Custom);
896 }
897 }
898
899 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
900 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
901 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
902 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
903 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
904 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
905 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
906 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
907 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
908 }
909
910 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
911 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
912 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
913 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
914 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
915 setOperationAction(ISD::FRINT, RoundedTy, Legal);
916 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
917 }
918
919 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
920 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
921 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
922 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
923 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
924 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
925 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
926 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
927
928 // FIXME: Do we need to handle scalar-to-vector here?
929 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
930
931 // We directly match byte blends in the backend as they match the VSELECT
932 // condition form.
933 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
934
935 // SSE41 brings specific instructions for doing vector sign extend even in
936 // cases where we don't have SRA.
937 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Legal);
938 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Legal);
939 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Legal);
940
941 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v2i64, Legal);
942 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v4i32, Legal);
943 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v8i16, Legal);
944
945 for (MVT VT : MVT::integer_vector_valuetypes()) {
946 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
947 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
948 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
949 }
950
951 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
952 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
953 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
954 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
955 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
956 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
957 setLoadExtAction(ISD::SEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
958
959 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i16, MVT::v8i8, Legal);
960 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
961 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
962 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
963 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
964 setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
965
966 // i8 vectors are custom because the source register and source
967 // source memory operand types are not the same width.
968 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
969 }
970
971 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
972 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
973 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
974 setOperationAction(ISD::ROTL, VT, Custom);
975
976 // XOP can efficiently perform BITREVERSE with VPPERM.
977 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
978 setOperationAction(ISD::BITREVERSE, VT, Custom);
979
980 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
981 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
982 setOperationAction(ISD::BITREVERSE, VT, Custom);
983 }
984
985 if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
986 bool HasInt256 = Subtarget.hasInt256();
987
988 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
989 : &X86::VR256RegClass);
990 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
991 : &X86::VR256RegClass);
992 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
993 : &X86::VR256RegClass);
994 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
995 : &X86::VR256RegClass);
996 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
997 : &X86::VR256RegClass);
998 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
999 : &X86::VR256RegClass);
1000
1001 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1002 setOperationAction(ISD::FFLOOR, VT, Legal);
1003 setOperationAction(ISD::FCEIL, VT, Legal);
1004 setOperationAction(ISD::FTRUNC, VT, Legal);
1005 setOperationAction(ISD::FRINT, VT, Legal);
1006 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1007 setOperationAction(ISD::FNEG, VT, Custom);
1008 setOperationAction(ISD::FABS, VT, Custom);
1009 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1010 }
1011
1012 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1013 // even though v8i16 is a legal type.
1014 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
1015 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
1016 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1017
1018 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
1019 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1020 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1021
1022 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1023 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1024
1025 for (MVT VT : MVT::fp_vector_valuetypes())
1026 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1027
1028 for (auto VT : { MVT::v32i8, MVT::v16i16 }) {
1029 setOperationAction(ISD::SRL, VT, Custom);
1030 setOperationAction(ISD::SHL, VT, Custom);
1031 setOperationAction(ISD::SRA, VT, Custom);
1032 }
1033
1034 setOperationAction(ISD::SETCC, MVT::v32i8, Custom);
1035 setOperationAction(ISD::SETCC, MVT::v16i16, Custom);
1036 setOperationAction(ISD::SETCC, MVT::v8i32, Custom);
1037 setOperationAction(ISD::SETCC, MVT::v4i64, Custom);
1038
1039 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1040 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1041 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1042
1043 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1044 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom);
1045 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1046 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom);
1047 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom);
1048 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i16, Custom);
1049 setOperationAction(ISD::ANY_EXTEND, MVT::v4i64, Custom);
1050 setOperationAction(ISD::ANY_EXTEND, MVT::v8i32, Custom);
1051 setOperationAction(ISD::ANY_EXTEND, MVT::v16i16, Custom);
1052 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1053 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1054 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1055 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1056
1057 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1058 setOperationAction(ISD::CTPOP, VT, Custom);
1059 setOperationAction(ISD::CTTZ, VT, Custom);
1060 setOperationAction(ISD::CTLZ, VT, Custom);
1061 }
1062
1063 if (Subtarget.hasAnyFMA()) {
1064 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1065 MVT::v2f64, MVT::v4f64 })
1066 setOperationAction(ISD::FMA, VT, Legal);
1067 }
1068
1069 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1070 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1071 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1072 }
1073
1074 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1075 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1076 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1077 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1078
1079 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1080 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1081
1082 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1083 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1084 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1085 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1086
1087 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1088 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1089 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1090 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1091 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1092 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1093 }
1094
1095 if (HasInt256) {
1096 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
1097 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom);
1098 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
1099
1100 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1101 // when we have a 256bit-wide blend with immediate.
1102 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1103
1104 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1105 setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1106 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1107 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1108 setLoadExtAction(ISD::SEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1109 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1110 setLoadExtAction(ISD::SEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1111
1112 setLoadExtAction(ISD::ZEXTLOAD, MVT::v16i16, MVT::v16i8, Legal);
1113 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1114 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1115 setLoadExtAction(ISD::ZEXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1116 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1117 setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1118 }
1119
1120 // In the customized shift lowering, the legal cases in AVX2 will be
1121 // recognized.
1122 for (auto VT : { MVT::v8i32, MVT::v4i64 }) {
1123 setOperationAction(ISD::SRL, VT, Custom);
1124 setOperationAction(ISD::SHL, VT, Custom);
1125 setOperationAction(ISD::SRA, VT, Custom);
1126 }
1127
1128 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1129 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1130 setOperationAction(ISD::MLOAD, VT, Legal);
1131 setOperationAction(ISD::MSTORE, VT, Legal);
1132 }
1133
1134 // Extract subvector is special because the value type
1135 // (result) is 128-bit but the source is 256-bit wide.
1136 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1137 MVT::v4f32, MVT::v2f64 }) {
1138 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1139 }
1140
1141 // Custom lower several nodes for 256-bit types.
1142 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1143 MVT::v8f32, MVT::v4f64 }) {
1144 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1145 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1146 setOperationAction(ISD::VSELECT, VT, Custom);
1147 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1148 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1149 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1150 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1151 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1152 }
1153
1154 if (HasInt256)
1155 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1156
1157 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1158 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1159 setOperationPromotedToType(ISD::AND, VT, MVT::v4i64);
1160 setOperationPromotedToType(ISD::OR, VT, MVT::v4i64);
1161 setOperationPromotedToType(ISD::XOR, VT, MVT::v4i64);
1162 setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64);
1163 setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
1164 }
1165 }
1166
1167 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1168 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1169 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1170 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1171 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1172
1173 addRegisterClass(MVT::i1, &X86::VK1RegClass);
1174 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1175 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1176
1177 for (MVT VT : MVT::fp_vector_valuetypes())
1178 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1179
1180 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
1181 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1182 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1183 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1184 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1185 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1186 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1187 }
1188 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
1189 setOperationAction(ISD::SETCC, MVT::i1, Custom);
1190 setOperationAction(ISD::SETCCE, MVT::i1, Custom);
1191 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
1192 setOperationAction(ISD::XOR, MVT::i1, Legal);
1193 setOperationAction(ISD::OR, MVT::i1, Legal);
1194 setOperationAction(ISD::AND, MVT::i1, Legal);
1195 setOperationAction(ISD::SUB, MVT::i1, Custom);
1196 setOperationAction(ISD::ADD, MVT::i1, Custom);
1197 setOperationAction(ISD::MUL, MVT::i1, Custom);
1198
1199 for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16,
1200 MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32,
1201 MVT::v8i64, MVT::v32i16, MVT::v64i8}) {
1202 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
1203 setLoadExtAction(ISD::SEXTLOAD, VT, MaskVT, Custom);
1204 setLoadExtAction(ISD::ZEXTLOAD, VT, MaskVT, Custom);
1205 setLoadExtAction(ISD::EXTLOAD, VT, MaskVT, Custom);
1206 setTruncStoreAction(VT, MaskVT, Custom);
1207 }
1208
1209 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1210 setOperationAction(ISD::FNEG, VT, Custom);
1211 setOperationAction(ISD::FABS, VT, Custom);
1212 setOperationAction(ISD::FMA, VT, Legal);
1213 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1214 }
1215
1216 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1217 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1218 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1219 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1220 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1221 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1222 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1223 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1224 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
1225 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
1226 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1227 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1228 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1229 setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
1230 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
1231 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1232 setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom);
1233 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1234 setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom);
1235 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
1236 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
1237 setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
1238 setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
1239 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
1240 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1241
1242 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1243 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1244 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1245 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1246 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1247 if (Subtarget.hasVLX()){
1248 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1249 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1250 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1251 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1252 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1253
1254 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1255 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1256 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1257 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1258 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1259 } else {
1260 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1261 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1262 setOperationAction(ISD::MLOAD, VT, Custom);
1263 setOperationAction(ISD::MSTORE, VT, Custom);
1264 }
1265 }
1266 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
1267 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1268 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1269 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i1, Custom);
1270 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i1, Custom);
1271 setOperationAction(ISD::VSELECT, MVT::v8i1, Expand);
1272 setOperationAction(ISD::VSELECT, MVT::v16i1, Expand);
1273 if (Subtarget.hasDQI()) {
1274 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1275 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Legal);
1276 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
1277 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1278 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Legal);
1279 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
1280 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1281 setOperationAction(ISD::FP_TO_SINT, MVT::v4i64, Legal);
1282 setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
1283 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1284 setOperationAction(ISD::FP_TO_UINT, MVT::v4i64, Legal);
1285 setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
1286
1287 if (Subtarget.hasVLX()) {
1288 // Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion.
1289 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1290 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1291 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1292 }
1293 }
1294 if (Subtarget.hasVLX()) {
1295 setOperationAction(ISD::ABS, MVT::v4i64, Legal);
1296 setOperationAction(ISD::ABS, MVT::v2i64, Legal);
1297 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1298 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1299 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1300 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1301 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1302 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1303 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1304 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
1305 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom);
1306 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1307 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
1308
1309 // FIXME. This commands are available on SSE/AVX2, add relevant patterns.
1310 setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1311 setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1312 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
1313 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
1314 setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1315 setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1316 setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1317 setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
1318 setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
1319 setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1320 }
1321
1322 setOperationAction(ISD::TRUNCATE, MVT::v8i1, Custom);
1323 setOperationAction(ISD::TRUNCATE, MVT::v16i1, Custom);
1324 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1325 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1326 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1327 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1328 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1329 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1330 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1331 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1332 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1333 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1334
1335 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1336 setOperationAction(ISD::FFLOOR, VT, Legal);
1337 setOperationAction(ISD::FCEIL, VT, Legal);
1338 setOperationAction(ISD::FTRUNC, VT, Legal);
1339 setOperationAction(ISD::FRINT, VT, Legal);
1340 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1341 }
1342
1343 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom);
1344 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
1345
1346 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1347 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1348 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1349
1350 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1351 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1352 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1353 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1354 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1355
1356 setOperationAction(ISD::SETCC, MVT::v16i1, Custom);
1357 setOperationAction(ISD::SETCC, MVT::v8i1, Custom);
1358
1359 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1360
1361 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom);
1362 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom);
1363 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1364 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom);
1365 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom);
1366 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom);
1367 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i1, Custom);
1368 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1369 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1370 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1371 setOperationAction(ISD::SELECT, MVT::v16i1, Custom);
1372 setOperationAction(ISD::SELECT, MVT::v8i1, Custom);
1373
1374 setOperationAction(ISD::SMAX, MVT::v16i32, Legal);
1375 setOperationAction(ISD::SMAX, MVT::v8i64, Legal);
1376 setOperationAction(ISD::UMAX, MVT::v16i32, Legal);
1377 setOperationAction(ISD::UMAX, MVT::v8i64, Legal);
1378 setOperationAction(ISD::SMIN, MVT::v16i32, Legal);
1379 setOperationAction(ISD::SMIN, MVT::v8i64, Legal);
1380 setOperationAction(ISD::UMIN, MVT::v16i32, Legal);
1381 setOperationAction(ISD::UMIN, MVT::v8i64, Legal);
1382
1383 setOperationAction(ISD::ADD, MVT::v8i1, Custom);
1384 setOperationAction(ISD::ADD, MVT::v16i1, Custom);
1385 setOperationAction(ISD::SUB, MVT::v8i1, Custom);
1386 setOperationAction(ISD::SUB, MVT::v16i1, Custom);
1387 setOperationAction(ISD::MUL, MVT::v8i1, Custom);
1388 setOperationAction(ISD::MUL, MVT::v16i1, Custom);
1389
1390 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1391
1392 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1393 setOperationAction(ISD::ABS, VT, Legal);
1394 setOperationAction(ISD::SRL, VT, Custom);
1395 setOperationAction(ISD::SHL, VT, Custom);
1396 setOperationAction(ISD::SRA, VT, Custom);
1397 setOperationAction(ISD::CTPOP, VT, Custom);
1398 setOperationAction(ISD::CTTZ, VT, Custom);
1399 }
1400
1401 // Need to promote to 64-bit even though we have 32-bit masked instructions
1402 // because the IR optimizers rearrange bitcasts around logic ops leaving
1403 // too many variations to handle if we don't promote them.
1404 setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
1405 setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64);
1406 setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
1407
1408 if (Subtarget.hasCDI()) {
1409 setOperationAction(ISD::CTLZ, MVT::v8i64, Legal);
1410 setOperationAction(ISD::CTLZ, MVT::v16i32, Legal);
1411
1412 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1413 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1414 setOperationAction(ISD::CTLZ, MVT::v16i16, Custom);
1415 setOperationAction(ISD::CTLZ, MVT::v32i8, Custom);
1416
1417 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i64, Custom);
1418 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i32, Custom);
1419
1420 if (Subtarget.hasVLX()) {
1421 setOperationAction(ISD::CTLZ, MVT::v4i64, Legal);
1422 setOperationAction(ISD::CTLZ, MVT::v8i32, Legal);
1423 setOperationAction(ISD::CTLZ, MVT::v2i64, Legal);
1424 setOperationAction(ISD::CTLZ, MVT::v4i32, Legal);
1425 } else {
1426 setOperationAction(ISD::CTLZ, MVT::v4i64, Custom);
1427 setOperationAction(ISD::CTLZ, MVT::v8i32, Custom);
1428 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1429 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1430 }
1431
1432 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i64, Custom);
1433 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i32, Custom);
1434 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
1435 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
1436 } // Subtarget.hasCDI()
1437
1438 if (Subtarget.hasDQI()) {
1439 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1440 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1441 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
1442 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1443 }
1444
1445 // Custom lower several nodes.
1446 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1447 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1448 setOperationAction(ISD::MGATHER, VT, Custom);
1449 setOperationAction(ISD::MSCATTER, VT, Custom);
1450 }
1451 // Extract subvector is special because the value type
1452 // (result) is 256-bit but the source is 512-bit wide.
1453 // 128-bit was made Custom under AVX1.
1454 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1455 MVT::v8f32, MVT::v4f64 })
1456 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1457 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1,
1458 MVT::v16i1, MVT::v32i1, MVT::v64i1 })
1459 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1460
1461 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1462 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1463 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1464 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1465 setOperationAction(ISD::VSELECT, VT, Legal);
1466 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1467 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1468 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1469 setOperationAction(ISD::MLOAD, VT, Legal);
1470 setOperationAction(ISD::MSTORE, VT, Legal);
1471 setOperationAction(ISD::MGATHER, VT, Legal);
1472 setOperationAction(ISD::MSCATTER, VT, Custom);
1473 }
1474 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
1475 setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
1476 setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
1477 }
1478 }// has AVX-512
1479
1480 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1481 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1482 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1483
1484 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1485 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1486
1487 setOperationAction(ISD::ADD, MVT::v32i1, Custom);
1488 setOperationAction(ISD::ADD, MVT::v64i1, Custom);
1489 setOperationAction(ISD::SUB, MVT::v32i1, Custom);
1490 setOperationAction(ISD::SUB, MVT::v64i1, Custom);
1491 setOperationAction(ISD::MUL, MVT::v32i1, Custom);
1492 setOperationAction(ISD::MUL, MVT::v64i1, Custom);
1493
1494 setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
1495 setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
1496 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1497 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1498 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1499 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1500 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1501 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1502 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1503 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1504 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1505 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1506 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1507 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1508 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1509 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1510 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1, Custom);
1511 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i1, Custom);
1512 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1513 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1514 setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
1515 setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
1516 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1517 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1518 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1519 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1520 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1521 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1522 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1523 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1524 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1525 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
1526 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
1527 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1528 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1529 setOperationAction(ISD::VSELECT, MVT::v32i16, Legal);
1530 setOperationAction(ISD::VSELECT, MVT::v64i8, Legal);
1531 setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
1532 setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
1533 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1534 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
1535 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
1536 setOperationAction(ISD::BUILD_VECTOR, MVT::v32i1, Custom);
1537 setOperationAction(ISD::BUILD_VECTOR, MVT::v64i1, Custom);
1538 setOperationAction(ISD::VSELECT, MVT::v32i1, Expand);
1539 setOperationAction(ISD::VSELECT, MVT::v64i1, Expand);
1540 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1541
1542 setOperationAction(ISD::SMAX, MVT::v64i8, Legal);
1543 setOperationAction(ISD::SMAX, MVT::v32i16, Legal);
1544 setOperationAction(ISD::UMAX, MVT::v64i8, Legal);
1545 setOperationAction(ISD::UMAX, MVT::v32i16, Legal);
1546 setOperationAction(ISD::SMIN, MVT::v64i8, Legal);
1547 setOperationAction(ISD::SMIN, MVT::v32i16, Legal);
1548 setOperationAction(ISD::UMIN, MVT::v64i8, Legal);
1549 setOperationAction(ISD::UMIN, MVT::v32i16, Legal);
1550
1551 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1552
1553 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1554 if (Subtarget.hasVLX()) {
1555 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1556 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1557 }
1558
1559 LegalizeAction Action = Subtarget.hasVLX() ? Legal : Custom;
1560 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1561 setOperationAction(ISD::MLOAD, VT, Action);
1562 setOperationAction(ISD::MSTORE, VT, Action);
1563 }
1564
1565 if (Subtarget.hasCDI()) {
1566 setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
1567 setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
1568 }
1569
1570 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1571 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1572 setOperationAction(ISD::VSELECT, VT, Legal);
1573 setOperationAction(ISD::ABS, VT, Legal);
1574 setOperationAction(ISD::SRL, VT, Custom);
1575 setOperationAction(ISD::SHL, VT, Custom);
1576 setOperationAction(ISD::SRA, VT, Custom);
1577 setOperationAction(ISD::MLOAD, VT, Legal);
1578 setOperationAction(ISD::MSTORE, VT, Legal);
1579 setOperationAction(ISD::CTPOP, VT, Custom);
1580 setOperationAction(ISD::CTTZ, VT, Custom);
1581
1582 setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
1583 setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
1584 setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
1585 }
1586
1587 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
1588 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1589 if (Subtarget.hasVLX()) {
1590 // FIXME. This commands are available on SSE/AVX2, add relevant patterns.
1591 setLoadExtAction(ExtType, MVT::v16i16, MVT::v16i8, Legal);
1592 setLoadExtAction(ExtType, MVT::v8i16, MVT::v8i8, Legal);
1593 }
1594 }
1595 }
1596
1597 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1598 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1599 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1600
1601 for (auto VT : { MVT::v2i1, MVT::v4i1 }) {
1602 setOperationAction(ISD::ADD, VT, Custom);
1603 setOperationAction(ISD::SUB, VT, Custom);
1604 setOperationAction(ISD::MUL, VT, Custom);
1605 setOperationAction(ISD::VSELECT, VT, Expand);
1606
1607 setOperationAction(ISD::TRUNCATE, VT, Custom);
1608 setOperationAction(ISD::SETCC, VT, Custom);
1609 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1610 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1611 setOperationAction(ISD::SELECT, VT, Custom);
1612 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1613 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1614 }
1615
1616 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1617 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1618 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1619 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1620
1621 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1622 setOperationAction(ISD::SMAX, VT, Legal);
1623 setOperationAction(ISD::UMAX, VT, Legal);
1624 setOperationAction(ISD::SMIN, VT, Legal);
1625 setOperationAction(ISD::UMIN, VT, Legal);
1626 }
1627 }
1628
1629 // We want to custom lower some of our intrinsics.
1630 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1631 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1632 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1633 if (!Subtarget.is64Bit()) {
1634 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1635 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1636 }
1637
1638 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1639 // handle type legalization for these operations here.
1640 //
1641 // FIXME: We really should do custom legalization for addition and
1642 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1643 // than generic legalization for 64-bit multiplication-with-overflow, though.
1644 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1645 if (VT == MVT::i64 && !Subtarget.is64Bit())
1646 continue;
1647 // Add/Sub/Mul with overflow operations are custom lowered.
1648 setOperationAction(ISD::SADDO, VT, Custom);
1649 setOperationAction(ISD::UADDO, VT, Custom);
1650 setOperationAction(ISD::SSUBO, VT, Custom);
1651 setOperationAction(ISD::USUBO, VT, Custom);
1652 setOperationAction(ISD::SMULO, VT, Custom);
1653 setOperationAction(ISD::UMULO, VT, Custom);
1654 }
1655
1656 if (!Subtarget.is64Bit()) {
1657 // These libcalls are not available in 32-bit.
1658 setLibcallName(RTLIB::SHL_I128, nullptr);
1659 setLibcallName(RTLIB::SRL_I128, nullptr);
1660 setLibcallName(RTLIB::SRA_I128, nullptr);
1661 }
1662
1663 // Combine sin / cos into one node or libcall if possible.
1664 if (Subtarget.hasSinCos()) {
1665 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1666 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1667 if (Subtarget.isTargetDarwin()) {
1668 // For MacOSX, we don't want the normal expansion of a libcall to sincos.
1669 // We want to issue a libcall to __sincos_stret to avoid memory traffic.
1670 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1671 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1672 }
1673 }
1674
1675 if (Subtarget.isTargetWin64()) {
1676 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1677 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1678 setOperationAction(ISD::SREM, MVT::i128, Custom);
1679 setOperationAction(ISD::UREM, MVT::i128, Custom);
1680 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1681 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1682 }
1683
1684 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1685 // is. We should promote the value to 64-bits to solve this.
1686 // This is what the CRT headers do - `fmodf` is an inline header
1687 // function casting to f64 and calling `fmod`.
1688 if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
1689 Subtarget.isTargetWindowsItanium()))
1690 for (ISD::NodeType Op :
1691 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1692 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1693 if (isOperationExpand(Op, MVT::f32))
1694 setOperationAction(Op, MVT::f32, Promote);
1695
1696 // We have target-specific dag combine patterns for the following nodes:
1697 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1698 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1699 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1700 setTargetDAGCombine(ISD::BITCAST);
1701 setTargetDAGCombine(ISD::VSELECT);
1702 setTargetDAGCombine(ISD::SELECT);
1703 setTargetDAGCombine(ISD::SHL);
1704 setTargetDAGCombine(ISD::SRA);
1705 setTargetDAGCombine(ISD::SRL);
1706 setTargetDAGCombine(ISD::OR);
1707 setTargetDAGCombine(ISD::AND);
1708 setTargetDAGCombine(ISD::ADD);
1709 setTargetDAGCombine(ISD::FADD);
1710 setTargetDAGCombine(ISD::FSUB);
1711 setTargetDAGCombine(ISD::FNEG);
1712 setTargetDAGCombine(ISD::FMA);
1713 setTargetDAGCombine(ISD::FMINNUM);
1714 setTargetDAGCombine(ISD::FMAXNUM);
1715 setTargetDAGCombine(ISD::SUB);
1716 setTargetDAGCombine(ISD::LOAD);
1717 setTargetDAGCombine(ISD::MLOAD);
1718 setTargetDAGCombine(ISD::STORE);
1719 setTargetDAGCombine(ISD::MSTORE);
1720 setTargetDAGCombine(ISD::TRUNCATE);
1721 setTargetDAGCombine(ISD::ZERO_EXTEND);
1722 setTargetDAGCombine(ISD::ANY_EXTEND);
1723 setTargetDAGCombine(ISD::SIGN_EXTEND);
1724 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1725 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
1726 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1727 setTargetDAGCombine(ISD::SINT_TO_FP);
1728 setTargetDAGCombine(ISD::UINT_TO_FP);
1729 setTargetDAGCombine(ISD::SETCC);
1730 setTargetDAGCombine(ISD::MUL);
1731 setTargetDAGCombine(ISD::XOR);
1732 setTargetDAGCombine(ISD::MSCATTER);
1733 setTargetDAGCombine(ISD::MGATHER);
1734
1735 computeRegisterProperties(Subtarget.getRegisterInfo());
1736
1737 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1738 MaxStoresPerMemsetOptSize = 8;
1739 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1740 MaxStoresPerMemcpyOptSize = 4;
1741 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1742 MaxStoresPerMemmoveOptSize = 4;
1743 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1744 setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1745
1746 // An out-of-order CPU can speculatively execute past a predictable branch,
1747 // but a conditional move could be stalled by an expensive earlier operation.
1748 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1749 EnableExtLdPromotion = true;
1750 setPrefFunctionAlignment(4); // 2^4 bytes.
1751
1752 verifyIntrinsicTables();
1753}
1754
1755// This has so far only been implemented for 64-bit MachO.
1756bool X86TargetLowering::useLoadStackGuardNode() const {
1757 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1758}
1759
1760TargetLoweringBase::LegalizeTypeAction
1761X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1762 if (ExperimentalVectorWideningLegalization &&
1763 VT.getVectorNumElements() != 1 &&
1764 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1765 return TypeWidenVector;
1766
1767 return TargetLoweringBase::getPreferredVectorAction(VT);
1768}
1769
1770EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1771 LLVMContext& Context,
1772 EVT VT) const {
1773 if (!VT.isVector())
1774 return Subtarget.hasAVX512() ? MVT::i1: MVT::i8;
1775
1776 if (VT.isSimple()) {
1777 MVT VVT = VT.getSimpleVT();
1778 const unsigned NumElts = VVT.getVectorNumElements();
1779 MVT EltVT = VVT.getVectorElementType();
1780 if (VVT.is512BitVector()) {
1781 if (Subtarget.hasAVX512())
1782 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1783 EltVT == MVT::f32 || EltVT == MVT::f64)
1784 switch(NumElts) {
1785 case 8: return MVT::v8i1;
1786 case 16: return MVT::v16i1;
1787 }
1788 if (Subtarget.hasBWI())
1789 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1790 switch(NumElts) {
1791 case 32: return MVT::v32i1;
1792 case 64: return MVT::v64i1;
1793 }
1794 }
1795
1796 if (Subtarget.hasBWI() && Subtarget.hasVLX())
1797 return MVT::getVectorVT(MVT::i1, NumElts);
1798
1799 if (!isTypeLegal(VT) && getTypeAction(Context, VT) == TypePromoteInteger) {
1800 EVT LegalVT = getTypeToTransformTo(Context, VT);
1801 EltVT = LegalVT.getVectorElementType().getSimpleVT();
1802 }
1803
1804 if (Subtarget.hasVLX() && EltVT.getSizeInBits() >= 32)
1805 switch(NumElts) {
1806 case 2: return MVT::v2i1;
1807 case 4: return MVT::v4i1;
1808 case 8: return MVT::v8i1;
1809 }
1810 }
1811
1812 return VT.changeVectorElementTypeToInteger();
1813}
1814
1815/// Helper for getByValTypeAlignment to determine
1816/// the desired ByVal argument alignment.
1817static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1818 if (MaxAlign == 16)
1819 return;
1820 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1821 if (VTy->getBitWidth() == 128)
1822 MaxAlign = 16;
1823 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1824 unsigned EltAlign = 0;
1825 getMaxByValAlign(ATy->getElementType(), EltAlign);
1826 if (EltAlign > MaxAlign)
1827 MaxAlign = EltAlign;
1828 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1829 for (auto *EltTy : STy->elements()) {
1830 unsigned EltAlign = 0;
1831 getMaxByValAlign(EltTy, EltAlign);
1832 if (EltAlign > MaxAlign)
1833 MaxAlign = EltAlign;
1834 if (MaxAlign == 16)
1835 break;
1836 }
1837 }
1838}
1839
1840/// Return the desired alignment for ByVal aggregate
1841/// function arguments in the caller parameter area. For X86, aggregates
1842/// that contain SSE vectors are placed at 16-byte boundaries while the rest
1843/// are at 4-byte boundaries.
1844unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1845 const DataLayout &DL) const {
1846 if (Subtarget.is64Bit()) {
1847 // Max of 8 and alignment of type.
1848 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1849 if (TyAlign > 8)
1850 return TyAlign;
1851 return 8;
1852 }
1853
1854 unsigned Align = 4;
1855 if (Subtarget.hasSSE1())
1856 getMaxByValAlign(Ty, Align);
1857 return Align;
1858}
1859
1860/// Returns the target specific optimal type for load
1861/// and store operations as a result of memset, memcpy, and memmove
1862/// lowering. If DstAlign is zero that means it's safe to destination
1863/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1864/// means there isn't a need to check it against alignment requirement,
1865/// probably because the source does not need to be loaded. If 'IsMemset' is
1866/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1867/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1868/// source is constant so it does not need to be loaded.
1869/// It returns EVT::Other if the type should be determined using generic
1870/// target-independent logic.
1871EVT
1872X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1873 unsigned DstAlign, unsigned SrcAlign,
1874 bool IsMemset, bool ZeroMemset,
1875 bool MemcpyStrSrc,
1876 MachineFunction &MF) const {
1877 const Function *F = MF.getFunction();
1878 if (!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
1879 if (Size >= 16 &&
1880 (!Subtarget.isUnalignedMem16Slow() ||
1881 ((DstAlign == 0 || DstAlign >= 16) &&
1882 (SrcAlign == 0 || SrcAlign >= 16)))) {
1883 // FIXME: Check if unaligned 32-byte accesses are slow.
1884 if (Size >= 32 && Subtarget.hasAVX()) {
1885 // Although this isn't a well-supported type for AVX1, we'll let
1886 // legalization and shuffle lowering produce the optimal codegen. If we
1887 // choose an optimal type with a vector element larger than a byte,
1888 // getMemsetStores() may create an intermediate splat (using an integer
1889 // multiply) before we splat as a vector.
1890 return MVT::v32i8;
1891 }
1892 if (Subtarget.hasSSE2())
1893 return MVT::v16i8;
1894 // TODO: Can SSE1 handle a byte vector?
1895 if (Subtarget.hasSSE1())
1896 return MVT::v4f32;
1897 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
1898 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
1899 // Do not use f64 to lower memcpy if source is string constant. It's
1900 // better to use i32 to avoid the loads.
1901 // Also, do not use f64 to lower memset unless this is a memset of zeros.
1902 // The gymnastics of splatting a byte value into an XMM register and then
1903 // only using 8-byte stores (because this is a CPU with slow unaligned
1904 // 16-byte accesses) makes that a loser.
1905 return MVT::f64;
1906 }
1907 }
1908 // This is a compromise. If we reach here, unaligned accesses may be slow on
1909 // this target. However, creating smaller, aligned accesses could be even
1910 // slower and would certainly be a lot more code.
1911 if (Subtarget.is64Bit() && Size >= 8)
1912 return MVT::i64;
1913 return MVT::i32;
1914}
1915
1916bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
1917 if (VT == MVT::f32)
1918 return X86ScalarSSEf32;
1919 else if (VT == MVT::f64)
1920 return X86ScalarSSEf64;
1921 return true;
1922}
1923
1924bool
1925X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1926 unsigned,
1927 unsigned,
1928 bool *Fast) const {
1929 if (Fast) {
1930 switch (VT.getSizeInBits()) {
1931 default:
1932 // 8-byte and under are always assumed to be fast.
1933 *Fast = true;
1934 break;
1935 case 128:
1936 *Fast = !Subtarget.isUnalignedMem16Slow();
1937 break;
1938 case 256:
1939 *Fast = !Subtarget.isUnalignedMem32Slow();
1940 break;
1941 // TODO: What about AVX-512 (512-bit) accesses?
1942 }
1943 }
1944 // Misaligned accesses of any size are always allowed.
1945 return true;
1946}
1947
1948/// Return the entry encoding for a jump table in the
1949/// current function. The returned value is a member of the
1950/// MachineJumpTableInfo::JTEntryKind enum.
1951unsigned X86TargetLowering::getJumpTableEncoding() const {
1952 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
1953 // symbol.
1954 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
1955 return MachineJumpTableInfo::EK_Custom32;
1956
1957 // Otherwise, use the normal jump table encoding heuristics.
1958 return TargetLowering::getJumpTableEncoding();
1959}
1960
1961bool X86TargetLowering::useSoftFloat() const {
1962 return Subtarget.useSoftFloat();
1963}
1964
1965void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
1966 ArgListTy &Args) const {
1967
1968 // Only relabel X86-32 for C / Stdcall CCs.
1969 if (static_cast<const X86Subtarget &>(MF->getSubtarget()).is64Bit())
1970 return;
1971 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
1972 return;
1973 unsigned ParamRegs = 0;
1974 if (auto *M = MF->getFunction()->getParent())
1975 ParamRegs = M->getNumberRegisterParameters();
1976
1977 // Mark the first N int arguments as having reg
1978 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
1979 Type *T = Args[Idx].Ty;
1980 if (T->isPointerTy() || T->isIntegerTy())
1981 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
1982 unsigned numRegs = 1;
1983 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
1984 numRegs = 2;
1985 if (ParamRegs < numRegs)
1986 return;
1987 ParamRegs -= numRegs;
1988 Args[Idx].IsInReg = true;
1989 }
1990 }
1991}
1992
1993const MCExpr *
1994X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1995 const MachineBasicBlock *MBB,
1996 unsigned uid,MCContext &Ctx) const{
1997 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 1997, __PRETTY_FUNCTION__))
;
1998 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
1999 // entries.
2000 return MCSymbolRefExpr::create(MBB->getSymbol(),
2001 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2002}
2003
2004/// Returns relocation base for the given PIC jumptable.
2005SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2006 SelectionDAG &DAG) const {
2007 if (!Subtarget.is64Bit())
2008 // This doesn't have SDLoc associated with it, but is not really the
2009 // same as a Register.
2010 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2011 getPointerTy(DAG.getDataLayout()));
2012 return Table;
2013}
2014
2015/// This returns the relocation base for the given PIC jumptable,
2016/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2017const MCExpr *X86TargetLowering::
2018getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2019 MCContext &Ctx) const {
2020 // X86-64 uses RIP relative addressing based on the jump table label.
2021 if (Subtarget.isPICStyleRIPRel())
2022 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2023
2024 // Otherwise, the reference is relative to the PIC base.
2025 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2026}
2027
2028std::pair<const TargetRegisterClass *, uint8_t>
2029X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2030 MVT VT) const {
2031 const TargetRegisterClass *RRC = nullptr;
2032 uint8_t Cost = 1;
2033 switch (VT.SimpleTy) {
2034 default:
2035 return TargetLowering::findRepresentativeClass(TRI, VT);
2036 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2037 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2038 break;
2039 case MVT::x86mmx:
2040 RRC = &X86::VR64RegClass;
2041 break;
2042 case MVT::f32: case MVT::f64:
2043 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2044 case MVT::v4f32: case MVT::v2f64:
2045 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2046 case MVT::v8f32: case MVT::v4f64:
2047 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2048 case MVT::v16f32: case MVT::v8f64:
2049 RRC = &X86::VR128XRegClass;
2050 break;
2051 }
2052 return std::make_pair(RRC, Cost);
2053}
2054
2055unsigned X86TargetLowering::getAddressSpace() const {
2056 if (Subtarget.is64Bit())
2057 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2058 return 256;
2059}
2060
2061static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2062 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2063 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2064}
2065
2066static Constant* SegmentOffset(IRBuilder<> &IRB,
2067 unsigned Offset, unsigned AddressSpace) {
2068 return ConstantExpr::getIntToPtr(
2069 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2070 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2071}
2072
2073Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2074 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2075 // tcbhead_t; use it instead of the usual global variable (see
2076 // sysdeps/{i386,x86_64}/nptl/tls.h)
2077 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2078 if (Subtarget.isTargetFuchsia()) {
2079 // <magenta/tls.h> defines MX_TLS_STACK_GUARD_OFFSET with this value.
2080 return SegmentOffset(IRB, 0x10, getAddressSpace());
2081 } else {
2082 // %fs:0x28, unless we're using a Kernel code model, in which case
2083 // it's %gs:0x28. gs:0x14 on i386.
2084 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2085 return SegmentOffset(IRB, Offset, getAddressSpace());
2086 }
2087 }
2088
2089 return TargetLowering::getIRStackGuard(IRB);
2090}
2091
2092void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2093 // MSVC CRT provides functionalities for stack protection.
2094 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
2095 // MSVC CRT has a global variable holding security cookie.
2096 M.getOrInsertGlobal("__security_cookie",
2097 Type::getInt8PtrTy(M.getContext()));
2098
2099 // MSVC CRT has a function to validate security cookie.
2100 auto *SecurityCheckCookie = cast<Function>(
2101 M.getOrInsertFunction("__security_check_cookie",
2102 Type::getVoidTy(M.getContext()),
2103 Type::getInt8PtrTy(M.getContext()), nullptr));
2104 SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
2105 SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
2106 return;
2107 }
2108 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2109 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2110 return;
2111 TargetLowering::insertSSPDeclarations(M);
2112}
2113
2114Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2115 // MSVC CRT has a global variable holding security cookie.
2116 if (Subtarget.getTargetTriple().isOSMSVCRT())
2117 return M.getGlobalVariable("__security_cookie");
2118 return TargetLowering::getSDagStackGuard(M);
2119}
2120
2121Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2122 // MSVC CRT has a function to validate security cookie.
2123 if (Subtarget.getTargetTriple().isOSMSVCRT())
2124 return M.getFunction("__security_check_cookie");
2125 return TargetLowering::getSSPStackGuardCheck(M);
2126}
2127
2128Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2129 if (Subtarget.getTargetTriple().isOSContiki())
2130 return getDefaultSafeStackPointerLocation(IRB, false);
2131
2132 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2133 // definition of TLS_SLOT_SAFESTACK in
2134 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2135 if (Subtarget.isTargetAndroid()) {
2136 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2137 // %gs:0x24 on i386
2138 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2139 return SegmentOffset(IRB, Offset, getAddressSpace());
2140 }
2141
2142 // Fuchsia is similar.
2143 if (Subtarget.isTargetFuchsia()) {
2144 // <magenta/tls.h> defines MX_TLS_UNSAFE_SP_OFFSET with this value.
2145 return SegmentOffset(IRB, 0x18, getAddressSpace());
2146 }
2147
2148 return TargetLowering::getSafeStackPointerLocation(IRB);
2149}
2150
2151bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2152 unsigned DestAS) const {
2153 assert(SrcAS != DestAS && "Expected different address spaces!")((SrcAS != DestAS && "Expected different address spaces!"
) ? static_cast<void> (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2153, __PRETTY_FUNCTION__))
;
2154
2155 return SrcAS < 256 && DestAS < 256;
2156}
2157
2158//===----------------------------------------------------------------------===//
2159// Return Value Calling Convention Implementation
2160//===----------------------------------------------------------------------===//
2161
2162#include "X86GenCallingConv.inc"
2163
2164bool X86TargetLowering::CanLowerReturn(
2165 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2166 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2167 SmallVector<CCValAssign, 16> RVLocs;
2168 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2169 return CCInfo.CheckReturn(Outs, RetCC_X86);
2170}
2171
2172const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2173 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2174 return ScratchRegs;
2175}
2176
2177/// Lowers masks values (v*i1) to the local register values
2178/// \returns DAG node after lowering to register type
2179static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2180 const SDLoc &Dl, SelectionDAG &DAG) {
2181 EVT ValVT = ValArg.getValueType();
2182
2183 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2184 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2185 // Two stage lowering might be required
2186 // bitcast: v8i1 -> i8 / v16i1 -> i16
2187 // anyextend: i8 -> i32 / i16 -> i32
2188 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2189 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2190 if (ValLoc == MVT::i32)
2191 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2192 return ValToCopy;
2193 } else if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2194 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2195 // One stage lowering is required
2196 // bitcast: v32i1 -> i32 / v64i1 -> i64
2197 return DAG.getBitcast(ValLoc, ValArg);
2198 } else
2199 return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg);
2200}
2201
2202/// Breaks v64i1 value into two registers and adds the new node to the DAG
2203static void Passv64i1ArgInRegs(
2204 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2205 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2206 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2207 assert((Subtarget.hasBWI() || Subtarget.hasBMI()) &&(((Subtarget.hasBWI() || Subtarget.hasBMI()) && "Expected AVX512BW or AVX512BMI target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI() || Subtarget.hasBMI()) && \"Expected AVX512BW or AVX512BMI target!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2208, __PRETTY_FUNCTION__))
2208 "Expected AVX512BW or AVX512BMI target!")(((Subtarget.hasBWI() || Subtarget.hasBMI()) && "Expected AVX512BW or AVX512BMI target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI() || Subtarget.hasBMI()) && \"Expected AVX512BW or AVX512BMI target!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2208, __PRETTY_FUNCTION__))
;
2209 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2209, __PRETTY_FUNCTION__))
;
2210 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2210, __PRETTY_FUNCTION__))
;
2211 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2212, __PRETTY_FUNCTION__))
2212 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2212, __PRETTY_FUNCTION__))
;
2213
2214 // Before splitting the value we cast it to i64
2215 Arg = DAG.getBitcast(MVT::i64, Arg);
2216
2217 // Splitting the value into two i32 types
2218 SDValue Lo, Hi;
2219 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2220 DAG.getConstant(0, Dl, MVT::i32));
2221 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2222 DAG.getConstant(1, Dl, MVT::i32));
2223
2224 // Attach the two i32 types into corresponding registers
2225 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2226 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2227}
2228
2229SDValue
2230X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2231 bool isVarArg,
2232 const SmallVectorImpl<ISD::OutputArg> &Outs,
2233 const SmallVectorImpl<SDValue> &OutVals,
2234 const SDLoc &dl, SelectionDAG &DAG) const {
2235 MachineFunction &MF = DAG.getMachineFunction();
2236 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2237
2238 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2239 report_fatal_error("X86 interrupts may not return any value");
2240
2241 SmallVector<CCValAssign, 16> RVLocs;
2242 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2243 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2244
2245 SDValue Flag;
2246 SmallVector<SDValue, 6> RetOps;
2247 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2248 // Operand #1 = Bytes To Pop
2249 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2250 MVT::i32));
2251
2252 // Copy the result values into the output registers.
2253 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2254 ++I, ++OutsIndex) {
2255 CCValAssign &VA = RVLocs[I];
2256 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2256, __PRETTY_FUNCTION__))
;
2257
2258 // Add the register to the CalleeSaveDisableRegs list.
2259 if (CallConv == CallingConv::X86_RegCall)
2260 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2261
2262 SDValue ValToCopy = OutVals[OutsIndex];
2263 EVT ValVT = ValToCopy.getValueType();
2264
2265 // Promote values to the appropriate types.
2266 if (VA.getLocInfo() == CCValAssign::SExt)
2267 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2268 else if (VA.getLocInfo() == CCValAssign::ZExt)
2269 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2270 else if (VA.getLocInfo() == CCValAssign::AExt) {
2271 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2272 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2273 else
2274 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2275 }
2276 else if (VA.getLocInfo() == CCValAssign::BCvt)
2277 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2278
2279 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2280, __PRETTY_FUNCTION__))
2280 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2280, __PRETTY_FUNCTION__))
;
2281
2282 // If this is x86-64, and we disabled SSE, we can't return FP values,
2283 // or SSE or MMX vectors.
2284 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2285 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2286 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2287 report_fatal_error("SSE register return with SSE disabled");
2288 }
2289 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2290 // llvm-gcc has never done it right and no one has noticed, so this
2291 // should be OK for now.
2292 if (ValVT == MVT::f64 &&
2293 (Subtarget.is64Bit() && !Subtarget.hasSSE2()))
2294 report_fatal_error("SSE2 register return with SSE2 disabled");
2295
2296 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2297 // the RET instruction and handled by the FP Stackifier.
2298 if (VA.getLocReg() == X86::FP0 ||
2299 VA.getLocReg() == X86::FP1) {
2300 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2301 // change the value to the FP stack register class.
2302 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2303 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2304 RetOps.push_back(ValToCopy);
2305 // Don't emit a copytoreg.
2306 continue;
2307 }
2308
2309 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2310 // which is returned in RAX / RDX.
2311 if (Subtarget.is64Bit()) {
2312 if (ValVT == MVT::x86mmx) {
2313 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2314 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2315 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2316 ValToCopy);
2317 // If we don't have SSE2 available, convert to v4f32 so the generated
2318 // register is legal.
2319 if (!Subtarget.hasSSE2())
2320 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2321 }
2322 }
2323 }
2324
2325 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2326
2327 if (VA.needsCustom()) {
2328 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2329, __PRETTY_FUNCTION__))
2329 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2329, __PRETTY_FUNCTION__))
;
2330
2331 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2332 Subtarget);
2333
2334 assert(2 == RegsToPass.size() &&((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2335, __PRETTY_FUNCTION__))
2335 "Expecting two registers after Pass64BitArgInRegs")((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2335, __PRETTY_FUNCTION__))
;
2336
2337 // Add the second register to the CalleeSaveDisableRegs list.
2338 if (CallConv == CallingConv::X86_RegCall)
2339 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2340 } else {
2341 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2342 }
2343
2344 // Add nodes to the DAG and add the values into the RetOps list
2345 for (auto &Reg : RegsToPass) {
2346 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2347 Flag = Chain.getValue(1);
2348 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2349 }
2350 }
2351
2352 // Swift calling convention does not require we copy the sret argument
2353 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2354
2355 // All x86 ABIs require that for returning structs by value we copy
2356 // the sret argument into %rax/%eax (depending on ABI) for the return.
2357 // We saved the argument into a virtual register in the entry block,
2358 // so now we copy the value out and into %rax/%eax.
2359 //
2360 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2361 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2362 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2363 // either case FuncInfo->setSRetReturnReg() will have been called.
2364 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2365 // When we have both sret and another return value, we should use the
2366 // original Chain stored in RetOps[0], instead of the current Chain updated
2367 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2368
2369 // For the case of sret and another return value, we have
2370 // Chain_0 at the function entry
2371 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2372 // If we use Chain_1 in getCopyFromReg, we will have
2373 // Val = getCopyFromReg(Chain_1)
2374 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2375
2376 // getCopyToReg(Chain_0) will be glued together with
2377 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2378 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2379 // Data dependency from Unit B to Unit A due to usage of Val in
2380 // getCopyToReg(Chain_1, Val)
2381 // Chain dependency from Unit A to Unit B
2382
2383 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2384 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2385 getPointerTy(MF.getDataLayout()));
2386
2387 unsigned RetValReg
2388 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2389 X86::RAX : X86::EAX;
2390 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2391 Flag = Chain.getValue(1);
2392
2393 // RAX/EAX now acts like a return value.
2394 RetOps.push_back(
2395 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2396
2397 // Add the returned register to the CalleeSaveDisableRegs list.
2398 if (CallConv == CallingConv::X86_RegCall)
2399 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2400 }
2401
2402 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2403 const MCPhysReg *I =
2404 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2405 if (I) {
2406 for (; *I; ++I) {
2407 if (X86::GR64RegClass.contains(*I))
2408 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2409 else
2410 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2410)
;
2411 }
2412 }
2413
2414 RetOps[0] = Chain; // Update chain.
2415
2416 // Add the flag if we have it.
2417 if (Flag.getNode())
2418 RetOps.push_back(Flag);
2419
2420 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2421 if (CallConv == CallingConv::X86_INTR)
2422 opcode = X86ISD::IRET;
2423 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2424}
2425
2426bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2427 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2428 return false;
2429
2430 SDValue TCChain = Chain;
2431 SDNode *Copy = *N->use_begin();
2432 if (Copy->getOpcode() == ISD::CopyToReg) {
2433 // If the copy has a glue operand, we conservatively assume it isn't safe to
2434 // perform a tail call.
2435 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2436 return false;
2437 TCChain = Copy->getOperand(0);
2438 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2439 return false;
2440
2441 bool HasRet = false;
2442 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2443 UI != UE; ++UI) {
2444 if (UI->getOpcode() != X86ISD::RET_FLAG)
2445 return false;
2446 // If we are returning more than one value, we can definitely
2447 // not make a tail call see PR19530
2448 if (UI->getNumOperands() > 4)
2449 return false;
2450 if (UI->getNumOperands() == 4 &&
2451 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2452 return false;
2453 HasRet = true;
2454 }
2455
2456 if (!HasRet)
2457 return false;
2458
2459 Chain = TCChain;
2460 return true;
2461}
2462
2463EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2464 ISD::NodeType ExtendKind) const {
2465 MVT ReturnMVT = MVT::i32;
2466
2467 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2468 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2469 // The ABI does not require i1, i8 or i16 to be extended.
2470 //
2471 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2472 // always extending i8/i16 return values, so keep doing that for now.
2473 // (PR26665).
2474 ReturnMVT = MVT::i8;
2475 }
2476
2477 EVT MinVT = getRegisterType(Context, ReturnMVT);
2478 return VT.bitsLT(MinVT) ? MinVT : VT;
2479}
2480
2481/// Reads two 32 bit registers and creates a 64 bit mask value.
2482/// \param VA The current 32 bit value that need to be assigned.
2483/// \param NextVA The next 32 bit value that need to be assigned.
2484/// \param Root The parent DAG node.
2485/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2486/// glue purposes. In the case the DAG is already using
2487/// physical register instead of virtual, we should glue
2488/// our new SDValue to InFlag SDvalue.
2489/// \return a new SDvalue of size 64bit.
2490static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2491 SDValue &Root, SelectionDAG &DAG,
2492 const SDLoc &Dl, const X86Subtarget &Subtarget,
2493 SDValue *InFlag = nullptr) {
2494 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2494, __PRETTY_FUNCTION__))
;
2495 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2495, __PRETTY_FUNCTION__))
;
2496 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2497, __PRETTY_FUNCTION__))
2497 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2497, __PRETTY_FUNCTION__))
;
2498 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2499, __PRETTY_FUNCTION__))
2499 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2499, __PRETTY_FUNCTION__))
;
2500 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2501, __PRETTY_FUNCTION__))
2501 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2501, __PRETTY_FUNCTION__))
;
2502
2503 SDValue Lo, Hi;
2504 unsigned Reg;
2505 SDValue ArgValueLo, ArgValueHi;
2506
2507 MachineFunction &MF = DAG.getMachineFunction();
2508 const TargetRegisterClass *RC = &X86::GR32RegClass;
2509
2510 // Read a 32 bit value from the registers
2511 if (nullptr == InFlag) {
2512 // When no physical register is present,
2513 // create an intermediate virtual register
2514 Reg = MF.addLiveIn(VA.getLocReg(), RC);
2515 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2516 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2517 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2518 } else {
2519 // When a physical register is available read the value from it and glue
2520 // the reads together.
2521 ArgValueLo =
2522 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2523 *InFlag = ArgValueLo.getValue(2);
2524 ArgValueHi =
2525 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2526 *InFlag = ArgValueHi.getValue(2);
2527 }
2528
2529 // Convert the i32 type into v32i1 type
2530 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2531
2532 // Convert the i32 type into v32i1 type
2533 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2534
2535 // Concantenate the two values together
2536 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2537}
2538
2539/// The function will lower a register of various sizes (8/16/32/64)
2540/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2541/// \returns a DAG node contains the operand after lowering to mask type.
2542static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2543 const EVT &ValLoc, const SDLoc &Dl,
2544 SelectionDAG &DAG) {
2545 SDValue ValReturned = ValArg;
2546
2547 if (ValVT == MVT::v64i1) {
2548 // In 32 bit machine, this case is handled by getv64i1Argument
2549 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2549, __PRETTY_FUNCTION__))
;
2550 // In 64 bit machine, There is no need to truncate the value only bitcast
2551 } else {
2552 MVT maskLen;
2553 switch (ValVT.getSimpleVT().SimpleTy) {
2554 case MVT::v8i1:
2555 maskLen = MVT::i8;
2556 break;
2557 case MVT::v16i1:
2558 maskLen = MVT::i16;
2559 break;
2560 case MVT::v32i1:
2561 maskLen = MVT::i32;
2562 break;
2563 default:
2564 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2564)
;
2565 }
2566
2567 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2568 }
2569
2570 return DAG.getBitcast(ValVT, ValReturned);
2571}
2572
2573/// Lower the result values of a call into the
2574/// appropriate copies out of appropriate physical registers.
2575///
2576SDValue X86TargetLowering::LowerCallResult(
2577 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2578 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2579 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2580 uint32_t *RegMask) const {
2581
2582 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2583 // Assign locations to each value returned by this call.
2584 SmallVector<CCValAssign, 16> RVLocs;
2585 bool Is64Bit = Subtarget.is64Bit();
2586 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2587 *DAG.getContext());
2588 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2589
2590 // Copy all of the result registers out of their specified physreg.
2591 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2592 ++I, ++InsIndex) {
2593 CCValAssign &VA = RVLocs[I];
2594 EVT CopyVT = VA.getLocVT();
2595
2596 // In some calling conventions we need to remove the used registers
2597 // from the register mask.
2598 if (RegMask && CallConv == CallingConv::X86_RegCall) {
2599 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2600 SubRegs.isValid(); ++SubRegs)
2601 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2602 }
2603
2604 // If this is x86-64, and we disabled SSE, we can't return FP values
2605 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2606 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2607 report_fatal_error("SSE register return with SSE disabled");
2608 }
2609
2610 // If we prefer to use the value in xmm registers, copy it out as f80 and
2611 // use a truncate to move it from fp stack reg to xmm reg.
2612 bool RoundAfterCopy = false;
2613 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2614 isScalarFPTypeInSSEReg(VA.getValVT())) {
2615 if (!Subtarget.hasX87())
2616 report_fatal_error("X87 register return with X87 disabled");
2617 CopyVT = MVT::f80;
2618 RoundAfterCopy = (CopyVT != VA.getLocVT());
2619 }
2620
2621 SDValue Val;
2622 if (VA.needsCustom()) {
2623 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2624, __PRETTY_FUNCTION__))
2624 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2624, __PRETTY_FUNCTION__))
;
2625 Val =
2626 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2627 } else {
2628 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2629 .getValue(1);
2630 Val = Chain.getValue(0);
2631 InFlag = Chain.getValue(2);
2632 }
2633
2634 if (RoundAfterCopy)
2635 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2636 // This truncation won't change the value.
2637 DAG.getIntPtrConstant(1, dl));
2638
2639 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2640 if (VA.getValVT().isVector() &&
2641 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2642 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2643 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2644 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2645 } else
2646 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2647 }
2648
2649 InVals.push_back(Val);
2650 }
2651
2652 return Chain;
2653}
2654
2655//===----------------------------------------------------------------------===//
2656// C & StdCall & Fast Calling Convention implementation
2657//===----------------------------------------------------------------------===//
2658// StdCall calling convention seems to be standard for many Windows' API
2659// routines and around. It differs from C calling convention just a little:
2660// callee should clean up the stack, not caller. Symbols should be also
2661// decorated in some fancy way :) It doesn't support any vector arguments.
2662// For info on fast calling convention see Fast Calling Convention (tail call)
2663// implementation LowerX86_32FastCCCallTo.
2664
2665/// CallIsStructReturn - Determines whether a call uses struct return
2666/// semantics.
2667enum StructReturnType {
2668 NotStructReturn,
2669 RegStructReturn,
2670 StackStructReturn
2671};
2672static StructReturnType
2673callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
2674 if (Outs.empty())
2675 return NotStructReturn;
2676
2677 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2678 if (!Flags.isSRet())
2679 return NotStructReturn;
2680 if (Flags.isInReg() || IsMCU)
2681 return RegStructReturn;
2682 return StackStructReturn;
2683}
2684
2685/// Determines whether a function uses struct return semantics.
2686static StructReturnType
2687argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
2688 if (Ins.empty())
2689 return NotStructReturn;
2690
2691 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2692 if (!Flags.isSRet())
2693 return NotStructReturn;
2694 if (Flags.isInReg() || IsMCU)
2695 return RegStructReturn;
2696 return StackStructReturn;
2697}
2698
2699/// Make a copy of an aggregate at address specified by "Src" to address
2700/// "Dst" with size and alignment information specified by the specific
2701/// parameter attribute. The copy will be passed as a byval function parameter.
2702static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2703 SDValue Chain, ISD::ArgFlagsTy Flags,
2704 SelectionDAG &DAG, const SDLoc &dl) {
2705 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2706
2707 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2708 /*isVolatile*/false, /*AlwaysInline=*/true,
2709 /*isTailCall*/false,
2710 MachinePointerInfo(), MachinePointerInfo());
2711}
2712
2713/// Return true if the calling convention is one that we can guarantee TCO for.
2714static bool canGuaranteeTCO(CallingConv::ID CC) {
2715 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2716 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2717 CC == CallingConv::HHVM);
2718}
2719
2720/// Return true if we might ever do TCO for calls with this calling convention.
2721static bool mayTailCallThisCC(CallingConv::ID CC) {
2722 switch (CC) {
2723 // C calling conventions:
2724 case CallingConv::C:
2725 case CallingConv::X86_64_Win64:
2726 case CallingConv::X86_64_SysV:
2727 // Callee pop conventions:
2728 case CallingConv::X86_ThisCall:
2729 case CallingConv::X86_StdCall:
2730 case CallingConv::X86_VectorCall:
2731 case CallingConv::X86_FastCall:
2732 return true;
2733 default:
2734 return canGuaranteeTCO(CC);
2735 }
2736}
2737
2738/// Return true if the function is being made into a tailcall target by
2739/// changing its ABI.
2740static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2741 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2742}
2743
2744bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2745 auto Attr =
2746 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2747 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2748 return false;
2749
2750 CallSite CS(CI);
2751 CallingConv::ID CalleeCC = CS.getCallingConv();
2752 if (!mayTailCallThisCC(CalleeCC))
2753 return false;
2754
2755 return true;
2756}
2757
2758SDValue
2759X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2760 const SmallVectorImpl<ISD::InputArg> &Ins,
2761 const SDLoc &dl, SelectionDAG &DAG,
2762 const CCValAssign &VA,
2763 MachineFrameInfo &MFI, unsigned i) const {
2764 // Create the nodes corresponding to a load from this parameter slot.
2765 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2766 bool AlwaysUseMutable = shouldGuaranteeTCO(
2767 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2768 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2769 EVT ValVT;
2770 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2771
2772 // If value is passed by pointer we have address passed instead of the value
2773 // itself. No need to extend if the mask value and location share the same
2774 // absolute size.
2775 bool ExtendedInMem =
2776 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2777 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
2778
2779 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2780 ValVT = VA.getLocVT();
2781 else
2782 ValVT = VA.getValVT();
2783
2784 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2785 // taken by a return address.
2786 int Offset = 0;
2787 if (CallConv == CallingConv::X86_INTR) {
2788 const X86Subtarget& Subtarget =
2789 static_cast<const X86Subtarget&>(DAG.getSubtarget());
2790 // X86 interrupts may take one or two arguments.
2791 // On the stack there will be no return address as in regular call.
2792 // Offset of last argument need to be set to -4/-8 bytes.
2793 // Where offset of the first argument out of two, should be set to 0 bytes.
2794 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2795 }
2796
2797 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2798 // changed with more analysis.
2799 // In case of tail call optimization mark all arguments mutable. Since they
2800 // could be overwritten by lowering of arguments in case of a tail call.
2801 if (Flags.isByVal()) {
2802 unsigned Bytes = Flags.getByValSize();
2803 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2804 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2805 // Adjust SP offset of interrupt parameter.
2806 if (CallConv == CallingConv::X86_INTR) {
2807 MFI.setObjectOffset(FI, Offset);
2808 }
2809 return DAG.getFrameIndex(FI, PtrVT);
2810 }
2811
2812 // This is an argument in memory. We might be able to perform copy elision.
2813 if (Flags.isCopyElisionCandidate()) {
2814 EVT ArgVT = Ins[i].ArgVT;
2815 SDValue PartAddr;
2816 if (Ins[i].PartOffset == 0) {
2817 // If this is a one-part value or the first part of a multi-part value,
2818 // create a stack object for the entire argument value type and return a
2819 // load from our portion of it. This assumes that if the first part of an
2820 // argument is in memory, the rest will also be in memory.
2821 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
2822 /*Immutable=*/false);
2823 PartAddr = DAG.getFrameIndex(FI, PtrVT);
2824 return DAG.getLoad(
2825 ValVT, dl, Chain, PartAddr,
2826 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2827 } else {
2828 // This is not the first piece of an argument in memory. See if there is
2829 // already a fixed stack object including this offset. If so, assume it
2830 // was created by the PartOffset == 0 branch above and create a load from
2831 // the appropriate offset into it.
2832 int64_t PartBegin = VA.getLocMemOffset();
2833 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
2834 int FI = MFI.getObjectIndexBegin();
2835 for (; MFI.isFixedObjectIndex(FI); ++FI) {
2836 int64_t ObjBegin = MFI.getObjectOffset(FI);
2837 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
2838 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
2839 break;
2840 }
2841 if (MFI.isFixedObjectIndex(FI)) {
2842 SDValue Addr =
2843 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
2844 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
2845 return DAG.getLoad(
2846 ValVT, dl, Chain, Addr,
2847 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
2848 Ins[i].PartOffset));
2849 }
2850 }
2851 }
2852
2853 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
2854 VA.getLocMemOffset(), isImmutable);
2855
2856 // Set SExt or ZExt flag.
2857 if (VA.getLocInfo() == CCValAssign::ZExt) {
2858 MFI.setObjectZExt(FI, true);
2859 } else if (VA.getLocInfo() == CCValAssign::SExt) {
2860 MFI.setObjectSExt(FI, true);
2861 }
2862
2863 // Adjust SP offset of interrupt parameter.
2864 if (CallConv == CallingConv::X86_INTR) {
2865 MFI.setObjectOffset(FI, Offset);
2866 }
2867
2868 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2869 SDValue Val = DAG.getLoad(
2870 ValVT, dl, Chain, FIN,
2871 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2872 return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)
2873 : Val;
2874}
2875
2876// FIXME: Get this from tablegen.
2877static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2878 const X86Subtarget &Subtarget) {
2879 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2879, __PRETTY_FUNCTION__))
;
2880
2881 if (Subtarget.isCallingConvWin64(CallConv)) {
2882 static const MCPhysReg GPR64ArgRegsWin64[] = {
2883 X86::RCX, X86::RDX, X86::R8, X86::R9
2884 };
2885 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2886 }
2887
2888 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2889 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2890 };
2891 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2892}
2893
2894// FIXME: Get this from tablegen.
2895static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2896 CallingConv::ID CallConv,
2897 const X86Subtarget &Subtarget) {
2898 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2898, __PRETTY_FUNCTION__))
;
2899 if (Subtarget.isCallingConvWin64(CallConv)) {
2900 // The XMM registers which might contain var arg parameters are shadowed
2901 // in their paired GPR. So we only need to save the GPR to their home
2902 // slots.
2903 // TODO: __vectorcall will change this.
2904 return None;
2905 }
2906
2907 const Function *Fn = MF.getFunction();
2908 bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
2909 bool isSoftFloat = Subtarget.useSoftFloat();
2910 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2911, __PRETTY_FUNCTION__))
2911 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2911, __PRETTY_FUNCTION__))
;
2912 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
2913 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2914 // registers.
2915 return None;
2916
2917 static const MCPhysReg XMMArgRegs64Bit[] = {
2918 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2919 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2920 };
2921 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2922}
2923
2924#ifndef NDEBUG
2925static bool isSortedByValueNo(const SmallVectorImpl<CCValAssign> &ArgLocs) {
2926 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
2927 [](const CCValAssign &A, const CCValAssign &B) -> bool {
2928 return A.getValNo() < B.getValNo();
2929 });
2930}
2931#endif
2932
2933SDValue X86TargetLowering::LowerFormalArguments(
2934 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2935 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2936 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2937 MachineFunction &MF = DAG.getMachineFunction();
2938 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2939 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
2940
2941 const Function *Fn = MF.getFunction();
2942 if (Fn->hasExternalLinkage() &&
2943 Subtarget.isTargetCygMing() &&
2944 Fn->getName() == "main")
2945 FuncInfo->setForceFramePointer(true);
2946
2947 MachineFrameInfo &MFI = MF.getFrameInfo();
2948 bool Is64Bit = Subtarget.is64Bit();
2949 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2950
2951 assert(((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2953, __PRETTY_FUNCTION__))
2952 !(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2953, __PRETTY_FUNCTION__))
2953 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2953, __PRETTY_FUNCTION__))
;
2954
2955 if (CallConv == CallingConv::X86_INTR) {
2956 bool isLegal = Ins.size() == 1 ||
2957 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
2958 (!Is64Bit && Ins[1].VT == MVT::i32)));
2959 if (!isLegal)
2960 report_fatal_error("X86 interrupts may take one or two arguments");
2961 }
2962
2963 // Assign locations to all of the incoming arguments.
2964 SmallVector<CCValAssign, 16> ArgLocs;
2965 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2966
2967 // Allocate shadow area for Win64.
2968 if (IsWin64)
2969 CCInfo.AllocateStack(32, 8);
2970
2971 CCInfo.AnalyzeArguments(Ins, CC_X86);
2972
2973 // In vectorcall calling convention a second pass is required for the HVA
2974 // types.
2975 if (CallingConv::X86_VectorCall == CallConv) {
2976 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
2977 }
2978
2979 // The next loop assumes that the locations are in the same order of the
2980 // input arguments.
2981 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2982, __PRETTY_FUNCTION__))
2982 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2982, __PRETTY_FUNCTION__))
;
2983
2984 SDValue ArgValue;
2985 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
2986 ++I, ++InsIndex) {
2987 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2987, __PRETTY_FUNCTION__))
;
2988 CCValAssign &VA = ArgLocs[I];
2989
2990 if (VA.isRegLoc()) {
2991 EVT RegVT = VA.getLocVT();
2992 if (VA.needsCustom()) {
2993 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2995, __PRETTY_FUNCTION__))
2994 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2995, __PRETTY_FUNCTION__))
2995 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 2995, __PRETTY_FUNCTION__))
;
2996
2997 // v64i1 values, in regcall calling convention, that are
2998 // compiled to 32 bit arch, are splited up into two registers.
2999 ArgValue =
3000 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3001 } else {
3002 const TargetRegisterClass *RC;
3003 if (RegVT == MVT::i32)
3004 RC = &X86::GR32RegClass;
3005 else if (Is64Bit && RegVT == MVT::i64)
3006 RC = &X86::GR64RegClass;
3007 else if (RegVT == MVT::f32)
3008 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3009 else if (RegVT == MVT::f64)
3010 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3011 else if (RegVT == MVT::f80)
3012 RC = &X86::RFP80RegClass;
3013 else if (RegVT == MVT::f128)
3014 RC = &X86::FR128RegClass;
3015 else if (RegVT.is512BitVector())
3016 RC = &X86::VR512RegClass;
3017 else if (RegVT.is256BitVector())
3018 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3019 else if (RegVT.is128BitVector())
3020 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3021 else if (RegVT == MVT::x86mmx)
3022 RC = &X86::VR64RegClass;
3023 else if (RegVT == MVT::i1)
3024 RC = &X86::VK1RegClass;
3025 else if (RegVT == MVT::v8i1)
3026 RC = &X86::VK8RegClass;
3027 else if (RegVT == MVT::v16i1)
3028 RC = &X86::VK16RegClass;
3029 else if (RegVT == MVT::v32i1)
3030 RC = &X86::VK32RegClass;
3031 else if (RegVT == MVT::v64i1)
3032 RC = &X86::VK64RegClass;
3033 else
3034 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3034)
;
3035
3036 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3037 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3038 }
3039
3040 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3041 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3042 // right size.
3043 if (VA.getLocInfo() == CCValAssign::SExt)
3044 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3045 DAG.getValueType(VA.getValVT()));
3046 else if (VA.getLocInfo() == CCValAssign::ZExt)
3047 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3048 DAG.getValueType(VA.getValVT()));
3049 else if (VA.getLocInfo() == CCValAssign::BCvt)
3050 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3051
3052 if (VA.isExtInLoc()) {
3053 // Handle MMX values passed in XMM regs.
3054 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3055 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3056 else if (VA.getValVT().isVector() &&
3057 VA.getValVT().getScalarType() == MVT::i1 &&
3058 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3059 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3060 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3061 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3062 } else
3063 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3064 }
3065 } else {
3066 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3066, __PRETTY_FUNCTION__))
;
3067 ArgValue =
3068 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3069 }
3070
3071 // If value is passed via pointer - do a load.
3072 if (VA.getLocInfo() == CCValAssign::Indirect)
3073 ArgValue =
3074 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3075
3076 InVals.push_back(ArgValue);
3077 }
3078
3079 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3080 // Swift calling convention does not require we copy the sret argument
3081 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3082 if (CallConv == CallingConv::Swift)
3083 continue;
3084
3085 // All x86 ABIs require that for returning structs by value we copy the
3086 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3087 // the argument into a virtual register so that we can access it from the
3088 // return points.
3089 if (Ins[I].Flags.isSRet()) {
3090 unsigned Reg = FuncInfo->getSRetReturnReg();
3091 if (!Reg) {
3092 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3093 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3094 FuncInfo->setSRetReturnReg(Reg);
3095 }
3096 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3097 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3098 break;
3099 }
3100 }
3101
3102 unsigned StackSize = CCInfo.getNextStackOffset();
3103 // Align stack specially for tail calls.
3104 if (shouldGuaranteeTCO(CallConv,
3105 MF.getTarget().Options.GuaranteedTailCallOpt))
3106 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3107
3108 // If the function takes variable number of arguments, make a frame index for
3109 // the start of the first vararg value... for expansion of llvm.va_start. We
3110 // can skip this if there are no va_start calls.
3111 if (MFI.hasVAStart() &&
3112 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3113 CallConv != CallingConv::X86_ThisCall))) {
3114 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3115 }
3116
3117 // Figure out if XMM registers are in use.
3118 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && Fn->hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3120, __PRETTY_FUNCTION__))
3119 Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && Fn->hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3120, __PRETTY_FUNCTION__))
3120 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && Fn->hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3120, __PRETTY_FUNCTION__))
;
3121
3122 // 64-bit calling conventions support varargs and register parameters, so we
3123 // have to do extra work to spill them in the prologue.
3124 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3125 // Find the first unallocated argument registers.
3126 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3127 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3128 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3129 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3130 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3131, __PRETTY_FUNCTION__))
3131 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3131, __PRETTY_FUNCTION__))
;
3132
3133 // Gather all the live in physical registers.
3134 SmallVector<SDValue, 6> LiveGPRs;
3135 SmallVector<SDValue, 8> LiveXMMRegs;
3136 SDValue ALVal;
3137 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3138 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3139 LiveGPRs.push_back(
3140 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3141 }
3142 if (!ArgXMMs.empty()) {
3143 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3144 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3145 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3146 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3147 LiveXMMRegs.push_back(
3148 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3149 }
3150 }
3151
3152 if (IsWin64) {
3153 // Get to the caller-allocated home save location. Add 8 to account
3154 // for the return address.
3155 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3156 FuncInfo->setRegSaveFrameIndex(
3157 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3158 // Fixup to set vararg frame on shadow area (4 x i64).
3159 if (NumIntRegs < 4)
3160 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3161 } else {
3162 // For X86-64, if there are vararg parameters that are passed via
3163 // registers, then we must store them to their spots on the stack so
3164 // they may be loaded by dereferencing the result of va_next.
3165 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3166 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3167 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3168 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3169 }
3170
3171 // Store the integer parameter registers.
3172 SmallVector<SDValue, 8> MemOps;
3173 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3174 getPointerTy(DAG.getDataLayout()));
3175 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3176 for (SDValue Val : LiveGPRs) {
3177 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3178 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3179 SDValue Store =
3180 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3181 MachinePointerInfo::getFixedStack(
3182 DAG.getMachineFunction(),
3183 FuncInfo->getRegSaveFrameIndex(), Offset));
3184 MemOps.push_back(Store);
3185 Offset += 8;
3186 }
3187
3188 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3189 // Now store the XMM (fp + vector) parameter registers.
3190 SmallVector<SDValue, 12> SaveXMMOps;
3191 SaveXMMOps.push_back(Chain);
3192 SaveXMMOps.push_back(ALVal);
3193 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3194 FuncInfo->getRegSaveFrameIndex(), dl));
3195 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3196 FuncInfo->getVarArgsFPOffset(), dl));
3197 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3198 LiveXMMRegs.end());
3199 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3200 MVT::Other, SaveXMMOps));
3201 }
3202
3203 if (!MemOps.empty())
3204 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3205 }
3206
3207 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3208 // Find the largest legal vector type.
3209 MVT VecVT = MVT::Other;
3210 // FIXME: Only some x86_32 calling conventions support AVX512.
3211 if (Subtarget.hasAVX512() &&
3212 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3213 CallConv == CallingConv::Intel_OCL_BI)))
3214 VecVT = MVT::v16f32;
3215 else if (Subtarget.hasAVX())
3216 VecVT = MVT::v8f32;
3217 else if (Subtarget.hasSSE2())
3218 VecVT = MVT::v4f32;
3219
3220 // We forward some GPRs and some vector types.
3221 SmallVector<MVT, 2> RegParmTypes;
3222 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3223 RegParmTypes.push_back(IntVT);
3224 if (VecVT != MVT::Other)
3225 RegParmTypes.push_back(VecVT);
3226
3227 // Compute the set of forwarded registers. The rest are scratch.
3228 SmallVectorImpl<ForwardedRegister> &Forwards =
3229 FuncInfo->getForwardedMustTailRegParms();
3230 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3231
3232 // Conservatively forward AL on x86_64, since it might be used for varargs.
3233 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3234 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3235 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3236 }
3237
3238 // Copy all forwards from physical to virtual registers.
3239 for (ForwardedRegister &F : Forwards) {
3240 // FIXME: Can we use a less constrained schedule?
3241 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3242 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
3243 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
3244 }
3245 }
3246
3247 // Some CCs need callee pop.
3248 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3249 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3250 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3251 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3252 // X86 interrupts must pop the error code if present
3253 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 8 : 4);
3254 } else {
3255 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3256 // If this is an sret function, the return should pop the hidden pointer.
3257 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3258 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3259 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3260 FuncInfo->setBytesToPopOnReturn(4);
3261 }
3262
3263 if (!Is64Bit) {
3264 // RegSaveFrameIndex is X86-64 only.
3265 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3266 if (CallConv == CallingConv::X86_FastCall ||
3267 CallConv == CallingConv::X86_ThisCall)
3268 // fastcc functions can't have varargs.
3269 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3270 }
3271
3272 FuncInfo->setArgumentStackSize(StackSize);
3273
3274 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3275 EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
3276 if (Personality == EHPersonality::CoreCLR) {
3277 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3277, __PRETTY_FUNCTION__))
;
3278 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3279 // that we'd prefer this slot be allocated towards the bottom of the frame
3280 // (i.e. near the stack pointer after allocating the frame). Every
3281 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3282 // offset from the bottom of this and each funclet's frame must be the
3283 // same, so the size of funclets' (mostly empty) frames is dictated by
3284 // how far this slot is from the bottom (since they allocate just enough
3285 // space to accommodate holding this slot at the correct offset).
3286 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3287 EHInfo->PSPSymFrameIdx = PSPSymFI;
3288 }
3289 }
3290
3291 if (CallConv == CallingConv::X86_RegCall) {
3292 const MachineRegisterInfo &MRI = MF.getRegInfo();
3293 for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end()))
3294 MF.getRegInfo().disableCalleeSavedRegister(Pair.first);
3295 }
3296
3297 return Chain;
3298}
3299
3300SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3301 SDValue Arg, const SDLoc &dl,
3302 SelectionDAG &DAG,
3303 const CCValAssign &VA,
3304 ISD::ArgFlagsTy Flags) const {
3305 unsigned LocMemOffset = VA.getLocMemOffset();
3306 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3307 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3308 StackPtr, PtrOff);
3309 if (Flags.isByVal())
3310 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3311
3312 return DAG.getStore(
3313 Chain, dl, Arg, PtrOff,
3314 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3315}
3316
3317/// Emit a load of return address if tail call
3318/// optimization is performed and it is required.
3319SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3320 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3321 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3322 // Adjust the Return address stack slot.
3323 EVT VT = getPointerTy(DAG.getDataLayout());
3324 OutRetAddr = getReturnAddressFrameIndex(DAG);
3325
3326 // Load the "old" Return address.
3327 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3328 return SDValue(OutRetAddr.getNode(), 1);
3329}
3330
3331/// Emit a store of the return address if tail call
3332/// optimization is performed and it is required (FPDiff!=0).
3333static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3334 SDValue Chain, SDValue RetAddrFrIdx,
3335 EVT PtrVT, unsigned SlotSize,
3336 int FPDiff, const SDLoc &dl) {
3337 // Store the return address to the appropriate stack slot.
3338 if (!FPDiff) return Chain;
3339 // Calculate the new stack slot for the return address.
3340 int NewReturnAddrFI =
3341 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3342 false);
3343 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3344 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3345 MachinePointerInfo::getFixedStack(
3346 DAG.getMachineFunction(), NewReturnAddrFI));
3347 return Chain;
3348}
3349
3350/// Returns a vector_shuffle mask for an movs{s|d}, movd
3351/// operation of specified width.
3352static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3353 SDValue V2) {
3354 unsigned NumElems = VT.getVectorNumElements();
3355 SmallVector<int, 8> Mask;
3356 Mask.push_back(NumElems);
3357 for (unsigned i = 1; i != NumElems; ++i)
3358 Mask.push_back(i);
3359 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3360}
3361
3362SDValue
3363X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3364 SmallVectorImpl<SDValue> &InVals) const {
3365 SelectionDAG &DAG = CLI.DAG;
3366 SDLoc &dl = CLI.DL;
3367 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3368 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3369 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3370 SDValue Chain = CLI.Chain;
3371 SDValue Callee = CLI.Callee;
3372 CallingConv::ID CallConv = CLI.CallConv;
3373 bool &isTailCall = CLI.IsTailCall;
3374 bool isVarArg = CLI.IsVarArg;
3375
3376 MachineFunction &MF = DAG.getMachineFunction();
3377 bool Is64Bit = Subtarget.is64Bit();
3378 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3379 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3380 bool IsSibcall = false;
3381 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3382 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
3383
3384 if (CallConv == CallingConv::X86_INTR)
3385 report_fatal_error("X86 interrupts may not be called directly");
3386
3387 if (Attr.getValueAsString() == "true")
3388 isTailCall = false;
3389
3390 if (Subtarget.isPICStyleGOT() &&
3391 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3392 // If we are using a GOT, disable tail calls to external symbols with
3393 // default visibility. Tail calling such a symbol requires using a GOT
3394 // relocation, which forces early binding of the symbol. This breaks code
3395 // that require lazy function symbol resolution. Using musttail or
3396 // GuaranteedTailCallOpt will override this.
3397 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3398 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3399 G->getGlobal()->hasDefaultVisibility()))
3400 isTailCall = false;
3401 }
3402
3403 bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
3404 if (IsMustTail) {
3405 // Force this to be a tail call. The verifier rules are enough to ensure
3406 // that we can lower this successfully without moving the return address
3407 // around.
3408 isTailCall = true;
3409 } else if (isTailCall) {
3410 // Check if it's really possible to do a tail call.
3411 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3412 isVarArg, SR != NotStructReturn,
3413 MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
3414 Outs, OutVals, Ins, DAG);
3415
3416 // Sibcalls are automatically detected tailcalls which do not require
3417 // ABI changes.
3418 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3419 IsSibcall = true;
3420
3421 if (isTailCall)
3422 ++NumTailCalls;
3423 }
3424
3425 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3426, __PRETTY_FUNCTION__))
3426 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3426, __PRETTY_FUNCTION__))
;
3427
3428 // Analyze operands of the call, assigning locations to each operand.
3429 SmallVector<CCValAssign, 16> ArgLocs;
3430 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3431
3432 // Allocate shadow area for Win64.
3433 if (IsWin64)
3434 CCInfo.AllocateStack(32, 8);
3435
3436 CCInfo.AnalyzeArguments(Outs, CC_X86);
3437
3438 // In vectorcall calling convention a second pass is required for the HVA
3439 // types.
3440 if (CallingConv::X86_VectorCall == CallConv) {
3441 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3442 }
3443
3444 // Get a count of how many bytes are to be pushed on the stack.
3445 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3446 if (IsSibcall)
3447 // This is a sibcall. The memory operands are available in caller's
3448 // own caller's stack.
3449 NumBytes = 0;
3450 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3451 canGuaranteeTCO(CallConv))
3452 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3453
3454 int FPDiff = 0;
3455 if (isTailCall && !IsSibcall && !IsMustTail) {
3456 // Lower arguments at fp - stackoffset + fpdiff.
3457 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3458
3459 FPDiff = NumBytesCallerPushed - NumBytes;
3460
3461 // Set the delta of movement of the returnaddr stackslot.
3462 // But only set if delta is greater than previous delta.
3463 if (FPDiff < X86Info->getTCReturnAddrDelta())
3464 X86Info->setTCReturnAddrDelta(FPDiff);
3465 }
3466
3467 unsigned NumBytesToPush = NumBytes;
3468 unsigned NumBytesToPop = NumBytes;
3469
3470 // If we have an inalloca argument, all stack space has already been allocated
3471 // for us and be right at the top of the stack. We don't support multiple
3472 // arguments passed in memory when using inalloca.
3473 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3474 NumBytesToPush = 0;
3475 if (!ArgLocs.back().isMemLoc())
3476 report_fatal_error("cannot use inalloca attribute on a register "
3477 "parameter");
3478 if (ArgLocs.back().getLocMemOffset() != 0)
3479 report_fatal_error("any parameter with the inalloca attribute must be "
3480 "the only memory argument");
3481 }
3482
3483 if (!IsSibcall)
3484 Chain = DAG.getCALLSEQ_START(
3485 Chain, DAG.getIntPtrConstant(NumBytesToPush, dl, true), dl);
3486
3487 SDValue RetAddrFrIdx;
3488 // Load return address for tail calls.
3489 if (isTailCall && FPDiff)
3490 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3491 Is64Bit, FPDiff, dl);
3492
3493 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3494 SmallVector<SDValue, 8> MemOpChains;
3495 SDValue StackPtr;
3496
3497 // The next loop assumes that the locations are in the same order of the
3498 // input arguments.
3499 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3500, __PRETTY_FUNCTION__))
3500 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3500, __PRETTY_FUNCTION__))
;
3501
3502 // Walk the register/memloc assignments, inserting copies/loads. In the case
3503 // of tail call optimization arguments are handle later.
3504 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3505 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3506 ++I, ++OutIndex) {
3507 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3507, __PRETTY_FUNCTION__))
;
3508 // Skip inalloca arguments, they have already been written.
3509 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3510 if (Flags.isInAlloca())
3511 continue;
3512
3513 CCValAssign &VA = ArgLocs[I];
3514 EVT RegVT = VA.getLocVT();
3515 SDValue Arg = OutVals[OutIndex];
3516 bool isByVal = Flags.isByVal();
3517
3518 // Promote the value if needed.
3519 switch (VA.getLocInfo()) {
3520 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3520)
;
3521 case CCValAssign::Full: break;
3522 case CCValAssign::SExt:
3523 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3524 break;
3525 case CCValAssign::ZExt:
3526 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3527 break;
3528 case CCValAssign::AExt:
3529 if (Arg.getValueType().isVector() &&
3530 Arg.getValueType().getVectorElementType() == MVT::i1)
3531 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3532 else if (RegVT.is128BitVector()) {
3533 // Special case: passing MMX values in XMM registers.
3534 Arg = DAG.getBitcast(MVT::i64, Arg);
3535 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3536 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3537 } else
3538 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3539 break;
3540 case CCValAssign::BCvt:
3541 Arg = DAG.getBitcast(RegVT, Arg);
3542 break;
3543 case CCValAssign::Indirect: {
3544 // Store the argument.
3545 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3546 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3547 Chain = DAG.getStore(
3548 Chain, dl, Arg, SpillSlot,
3549 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3550 Arg = SpillSlot;
3551 break;
3552 }
3553 }
3554
3555 if (VA.needsCustom()) {
3556 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3557, __PRETTY_FUNCTION__))
3557 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3557, __PRETTY_FUNCTION__))
;
3558 // Split v64i1 value into two registers
3559 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3560 Subtarget);
3561 } else if (VA.isRegLoc()) {
3562 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3563 if (isVarArg && IsWin64) {
3564 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3565 // shadow reg if callee is a varargs function.
3566 unsigned ShadowReg = 0;
3567 switch (VA.getLocReg()) {
3568 case X86::XMM0: ShadowReg = X86::RCX; break;
3569 case X86::XMM1: ShadowReg = X86::RDX; break;
3570 case X86::XMM2: ShadowReg = X86::R8; break;
3571 case X86::XMM3: ShadowReg = X86::R9; break;
3572 }
3573 if (ShadowReg)
3574 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3575 }
3576 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3577 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3577, __PRETTY_FUNCTION__))
;
3578 if (!StackPtr.getNode())
3579 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3580 getPointerTy(DAG.getDataLayout()));
3581 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3582 dl, DAG, VA, Flags));
3583 }
3584 }
3585
3586 if (!MemOpChains.empty())
3587 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3588
3589 if (Subtarget.isPICStyleGOT()) {
3590 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3591 // GOT pointer.
3592 if (!isTailCall) {
3593 RegsToPass.push_back(std::make_pair(
3594 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3595 getPointerTy(DAG.getDataLayout()))));
3596 } else {
3597 // If we are tail calling and generating PIC/GOT style code load the
3598 // address of the callee into ECX. The value in ecx is used as target of
3599 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3600 // for tail calls on PIC/GOT architectures. Normally we would just put the
3601 // address of GOT into ebx and then call target@PLT. But for tail calls
3602 // ebx would be restored (since ebx is callee saved) before jumping to the
3603 // target@PLT.
3604
3605 // Note: The actual moving to ECX is done further down.
3606 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3607 if (G && !G->getGlobal()->hasLocalLinkage() &&
3608 G->getGlobal()->hasDefaultVisibility())
3609 Callee = LowerGlobalAddress(Callee, DAG);
3610 else if (isa<ExternalSymbolSDNode>(Callee))
3611 Callee = LowerExternalSymbol(Callee, DAG);
3612 }
3613 }
3614
3615 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3616 // From AMD64 ABI document:
3617 // For calls that may call functions that use varargs or stdargs
3618 // (prototype-less calls or calls to functions containing ellipsis (...) in
3619 // the declaration) %al is used as hidden argument to specify the number
3620 // of SSE registers used. The contents of %al do not need to match exactly
3621 // the number of registers, but must be an ubound on the number of SSE
3622 // registers used and is in the range 0 - 8 inclusive.
3623
3624 // Count the number of XMM registers allocated.
3625 static const MCPhysReg XMMArgRegs[] = {
3626 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3627 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3628 };
3629 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3630 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3631, __PRETTY_FUNCTION__))
3631 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3631, __PRETTY_FUNCTION__))
;
3632
3633 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3634 DAG.getConstant(NumXMMRegs, dl,
3635 MVT::i8)));
3636 }
3637
3638 if (isVarArg && IsMustTail) {
3639 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3640 for (const auto &F : Forwards) {
3641 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3642 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3643 }
3644 }
3645
3646 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3647 // don't need this because the eligibility check rejects calls that require
3648 // shuffling arguments passed in memory.
3649 if (!IsSibcall && isTailCall) {
3650 // Force all the incoming stack arguments to be loaded from the stack
3651 // before any new outgoing arguments are stored to the stack, because the
3652 // outgoing stack slots may alias the incoming argument stack slots, and
3653 // the alias isn't otherwise explicit. This is slightly more conservative
3654 // than necessary, because it means that each store effectively depends
3655 // on every argument instead of just those arguments it would clobber.
3656 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3657
3658 SmallVector<SDValue, 8> MemOpChains2;
3659 SDValue FIN;
3660 int FI = 0;
3661 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3662 ++I, ++OutsIndex) {
3663 CCValAssign &VA = ArgLocs[I];
3664
3665 if (VA.isRegLoc()) {
3666 if (VA.needsCustom()) {
3667 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custome case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custome case only in regcall calling convention\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3668, __PRETTY_FUNCTION__))
3668 "Expecting custome case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custome case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custome case only in regcall calling convention\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3668, __PRETTY_FUNCTION__))
;
3669 // This means that we are in special case where one argument was
3670 // passed through two register locations - Skip the next location
3671 ++I;
3672 }
3673
3674 continue;
3675 }
3676
3677 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3677, __PRETTY_FUNCTION__))
;
3678 SDValue Arg = OutVals[OutsIndex];
3679 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3680 // Skip inalloca arguments. They don't require any work.
3681 if (Flags.isInAlloca())
3682 continue;
3683 // Create frame index.
3684 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3685 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3686 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3687 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3688
3689 if (Flags.isByVal()) {
3690 // Copy relative to framepointer.
3691 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3692 if (!StackPtr.getNode())
3693 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3694 getPointerTy(DAG.getDataLayout()));
3695 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3696 StackPtr, Source);
3697
3698 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3699 ArgChain,
3700 Flags, DAG, dl));
3701 } else {
3702 // Store relative to framepointer.
3703 MemOpChains2.push_back(DAG.getStore(
3704 ArgChain, dl, Arg, FIN,
3705 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3706 }
3707 }
3708
3709 if (!MemOpChains2.empty())
3710 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3711
3712 // Store the return address to the appropriate stack slot.
3713 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3714 getPointerTy(DAG.getDataLayout()),
3715 RegInfo->getSlotSize(), FPDiff, dl);
3716 }
3717
3718 // Build a sequence of copy-to-reg nodes chained together with token chain
3719 // and flag operands which copy the outgoing args into registers.
3720 SDValue InFlag;
3721 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3722 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3723 RegsToPass[i].second, InFlag);
3724 InFlag = Chain.getValue(1);
3725 }
3726
3727 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3728 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3728, __PRETTY_FUNCTION__))
;
3729 // In the 64-bit large code model, we have to make all calls
3730 // through a register, since the call instruction's 32-bit
3731 // pc-relative offset may not be large enough to hold the whole
3732 // address.
3733 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3734 // If the callee is a GlobalAddress node (quite common, every direct call
3735 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3736 // it.
3737 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3738
3739 // We should use extra load for direct calls to dllimported functions in
3740 // non-JIT mode.
3741 const GlobalValue *GV = G->getGlobal();
3742 if (!GV->hasDLLImportStorageClass()) {
3743 unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
3744
3745 Callee = DAG.getTargetGlobalAddress(
3746 GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
3747
3748 if (OpFlags == X86II::MO_GOTPCREL) {
3749 // Add a wrapper.
3750 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3751 getPointerTy(DAG.getDataLayout()), Callee);
3752 // Add extra indirection
3753 Callee = DAG.getLoad(
3754 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3755 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3756 }
3757 }
3758 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3759 const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
3760 unsigned char OpFlags =
3761 Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
3762
3763 Callee = DAG.getTargetExternalSymbol(
3764 S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
3765 } else if (Subtarget.isTarget64BitILP32() &&
3766 Callee->getValueType(0) == MVT::i32) {
3767 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3768 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3769 }
3770
3771 // Returns a chain & a flag for retval copy to use.
3772 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3773 SmallVector<SDValue, 8> Ops;
3774
3775 if (!IsSibcall && isTailCall) {
3776 Chain = DAG.getCALLSEQ_END(Chain,
3777 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3778 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3779 InFlag = Chain.getValue(1);
3780 }
3781
3782 Ops.push_back(Chain);
3783 Ops.push_back(Callee);
3784
3785 if (isTailCall)
3786 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
3787
3788 // Add argument registers to the end of the list so that they are known live
3789 // into the call.
3790 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3791 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3792 RegsToPass[i].second.getValueType()));
3793
3794 // Add a register mask operand representing the call-preserved registers.
3795 const uint32_t *Mask = RegInfo->getCallPreservedMask(MF, CallConv);
3796 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 3796, __PRETTY_FUNCTION__))
;
3797
3798 // If this is an invoke in a 32-bit function using a funclet-based
3799 // personality, assume the function clobbers all registers. If an exception
3800 // is thrown, the runtime will not restore CSRs.
3801 // FIXME: Model this more precisely so that we can register allocate across
3802 // the normal edge and spill and fill across the exceptional edge.
3803 if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
3804 const Function *CallerFn = MF.getFunction();
3805 EHPersonality Pers =
3806 CallerFn->hasPersonalityFn()
3807 ? classifyEHPersonality(CallerFn->getPersonalityFn())
3808 : EHPersonality::Unknown;
3809 if (isFuncletEHPersonality(Pers))
3810 Mask = RegInfo->getNoPreservedMask();
3811 }
3812
3813 // Define a new register mask from the existing mask.
3814 uint32_t *RegMask = nullptr;
3815
3816 // In some calling conventions we need to remove the used physical registers
3817 // from the reg mask.
3818 if (CallConv == CallingConv::X86_RegCall) {
3819 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3820
3821 // Allocate a new Reg Mask and copy Mask.
3822 RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
3823 unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
3824 memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);
3825
3826 // Make sure all sub registers of the argument registers are reset
3827 // in the RegMask.
3828 for (auto const &RegPair : RegsToPass)
3829 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
3830 SubRegs.isValid(); ++SubRegs)
3831 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3832
3833 // Create the RegMask Operand according to our updated mask.
3834 Ops.push_back(DAG.getRegisterMask(RegMask));
3835 } else {
3836 // Create the RegMask Operand according to the static mask.
3837 Ops.push_back(DAG.getRegisterMask(Mask));
3838 }
3839
3840 if (InFlag.getNode())
3841 Ops.push_back(InFlag);
3842
3843 if (isTailCall) {
3844 // We used to do:
3845 //// If this is the first return lowered for this function, add the regs
3846 //// to the liveout set for the function.
3847 // This isn't right, although it's probably harmless on x86; liveouts
3848 // should be computed from returns not tail calls. Consider a void
3849 // function making a tail call to a function returning int.
3850 MF.getFrameInfo().setHasTailCall();
3851 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
3852 }
3853
3854 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
3855 InFlag = Chain.getValue(1);
3856
3857 // Create the CALLSEQ_END node.
3858 unsigned NumBytesForCalleeToPop;
3859 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3860 DAG.getTarget().Options.GuaranteedTailCallOpt))
3861 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
3862 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3863 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3864 SR == StackStructReturn)
3865 // If this is a call to a struct-return function, the callee
3866 // pops the hidden struct pointer, so we have to push it back.
3867 // This is common for Darwin/X86, Linux & Mingw32 targets.
3868 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
3869 NumBytesForCalleeToPop = 4;
3870 else
3871 NumBytesForCalleeToPop = 0; // Callee pops nothing.
3872
3873 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
3874 // No need to reset the stack after the call if the call doesn't return. To
3875 // make the MI verify, we'll pretend the callee does it for us.
3876 NumBytesForCalleeToPop = NumBytes;
3877 }
3878
3879 // Returns a flag for retval copy to use.
3880 if (!IsSibcall) {
3881 Chain = DAG.getCALLSEQ_END(Chain,
3882 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3883 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
3884 true),
3885 InFlag, dl);
3886 InFlag = Chain.getValue(1);
3887 }
3888
3889 // Handle result values, copying them out of physregs into vregs that we
3890 // return.
3891 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
3892 InVals, RegMask);
3893}
3894
3895//===----------------------------------------------------------------------===//
3896// Fast Calling Convention (tail call) implementation
3897//===----------------------------------------------------------------------===//
3898
3899// Like std call, callee cleans arguments, convention except that ECX is
3900// reserved for storing the tail called function address. Only 2 registers are
3901// free for argument passing (inreg). Tail call optimization is performed
3902// provided:
3903// * tailcallopt is enabled
3904// * caller/callee are fastcc
3905// On X86_64 architecture with GOT-style position independent code only local
3906// (within module) calls are supported at the moment.
3907// To keep the stack aligned according to platform abi the function
3908// GetAlignedArgumentStackSize ensures that argument delta is always multiples
3909// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
3910// If a tail called function callee has more arguments than the caller the
3911// caller needs to make sure that there is room to move the RETADDR to. This is
3912// achieved by reserving an area the size of the argument delta right after the
3913// original RETADDR, but before the saved framepointer or the spilled registers
3914// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
3915// stack layout:
3916// arg1
3917// arg2
3918// RETADDR
3919// [ new RETADDR
3920// move area ]
3921// (possible EBP)
3922// ESI
3923// EDI
3924// local1 ..
3925
3926/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
3927/// requirement.
3928unsigned
3929X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
3930 SelectionDAG& DAG) const {
3931 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3932 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3933 unsigned StackAlignment = TFI.getStackAlignment();
3934 uint64_t AlignMask = StackAlignment - 1;
3935 int64_t Offset = StackSize;
3936 unsigned SlotSize = RegInfo->getSlotSize();
3937 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
3938 // Number smaller than 12 so just add the difference.
3939 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
3940 } else {
3941 // Mask out lower bits, add stackalignment once plus the 12 bytes.
3942 Offset = ((~AlignMask) & Offset) + StackAlignment +
3943 (StackAlignment-SlotSize);
3944 }
3945 return Offset;
3946}
3947
3948/// Return true if the given stack call argument is already available in the
3949/// same position (relatively) of the caller's incoming argument stack.
3950static
3951bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
3952 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
3953 const X86InstrInfo *TII, const CCValAssign &VA) {
3954 unsigned Bytes = Arg.getValueSizeInBits() / 8;
3955
3956 for (;;) {
3957 // Look through nodes that don't alter the bits of the incoming value.
3958 unsigned Op = Arg.getOpcode();
3959 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
3960 Arg = Arg.getOperand(0);
3961 continue;
3962 }
3963 if (Op == ISD::TRUNCATE) {
3964 const SDValue &TruncInput = Arg.getOperand(0);
3965 if (TruncInput.getOpcode() == ISD::AssertZext &&
3966 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
3967 Arg.getValueType()) {
3968 Arg = TruncInput.getOperand(0);
3969 continue;
3970 }
3971 }
3972 break;
3973 }
3974
3975 int FI = INT_MAX2147483647;
3976 if (Arg.getOpcode() == ISD::CopyFromReg) {
3977 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
3978 if (!TargetRegisterInfo::isVirtualRegister(VR))
3979 return false;
3980 MachineInstr *Def = MRI->getVRegDef(VR);
3981 if (!Def)
3982 return false;
3983 if (!Flags.isByVal()) {
3984 if (!TII->isLoadFromStackSlot(*Def, FI))
3985 return false;
3986 } else {
3987 unsigned Opcode = Def->getOpcode();
3988 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
3989 Opcode == X86::LEA64_32r) &&
3990 Def->getOperand(1).isFI()) {
3991 FI = Def->getOperand(1).getIndex();
3992 Bytes = Flags.getByValSize();
3993 } else
3994 return false;
3995 }
3996 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
3997 if (Flags.isByVal())
3998 // ByVal argument is passed in as a pointer but it's now being
3999 // dereferenced. e.g.
4000 // define @foo(%struct.X* %A) {
4001 // tail call @bar(%struct.X* byval %A)
4002 // }
4003 return false;
4004 SDValue Ptr = Ld->getBasePtr();
4005 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4006 if (!FINode)
4007 return false;
4008 FI = FINode->getIndex();
4009 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4010 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4011 FI = FINode->getIndex();
4012 Bytes = Flags.getByValSize();
4013 } else
4014 return false;
4015
4016 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4016, __PRETTY_FUNCTION__))
;
4017 if (!MFI.isFixedObjectIndex(FI))
4018 return false;
4019
4020 if (Offset != MFI.getObjectOffset(FI))
4021 return false;
4022
4023 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
4024 // If the argument location is wider than the argument type, check that any
4025 // extension flags match.
4026 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4027 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4028 return false;
4029 }
4030 }
4031
4032 return Bytes == MFI.getObjectSize(FI);
4033}
4034
4035/// Check whether the call is eligible for tail call optimization. Targets
4036/// that want to do tail call optimization should implement this function.
4037bool X86TargetLowering::IsEligibleForTailCallOptimization(
4038 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4039 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4040 const SmallVectorImpl<ISD::OutputArg> &Outs,
4041 const SmallVectorImpl<SDValue> &OutVals,
4042 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4043 if (!mayTailCallThisCC(CalleeCC))
4044 return false;
4045
4046 // If -tailcallopt is specified, make fastcc functions tail-callable.
4047 MachineFunction &MF = DAG.getMachineFunction();
4048 const Function *CallerF = MF.getFunction();
4049
4050 // If the function return type is x86_fp80 and the callee return type is not,
4051 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4052 // perform a tailcall optimization here.
4053 if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4054 return false;
4055
4056 CallingConv::ID CallerCC = CallerF->getCallingConv();
4057 bool CCMatch = CallerCC == CalleeCC;
4058 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4059 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4060
4061 // Win64 functions have extra shadow space for argument homing. Don't do the
4062 // sibcall if the caller and callee have mismatched expectations for this
4063 // space.
4064 if (IsCalleeWin64 != IsCallerWin64)
4065 return false;
4066
4067 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4068 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4069 return true;
4070 return false;
4071 }
4072
4073 // Look for obvious safe cases to perform tail call optimization that do not
4074 // require ABI changes. This is what gcc calls sibcall.
4075
4076 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4077 // emit a special epilogue.
4078 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4079 if (RegInfo->needsStackRealignment(MF))
4080 return false;
4081
4082 // Also avoid sibcall optimization if either caller or callee uses struct
4083 // return semantics.
4084 if (isCalleeStructRet || isCallerStructRet)
4085 return false;
4086
4087 // Do not sibcall optimize vararg calls unless all arguments are passed via
4088 // registers.
4089 LLVMContext &C = *DAG.getContext();
4090 if (isVarArg && !Outs.empty()) {
4091 // Optimizing for varargs on Win64 is unlikely to be safe without
4092 // additional testing.
4093 if (IsCalleeWin64 || IsCallerWin64)
4094 return false;
4095
4096 SmallVector<CCValAssign, 16> ArgLocs;
4097 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4098
4099 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4100 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4101 if (!ArgLocs[i].isRegLoc())
4102 return false;
4103 }
4104
4105 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4106 // stack. Therefore, if it's not used by the call it is not safe to optimize
4107 // this into a sibcall.
4108 bool Unused = false;
4109 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4110 if (!Ins[i].Used) {
4111 Unused = true;
4112 break;
4113 }
4114 }
4115 if (Unused) {
4116 SmallVector<CCValAssign, 16> RVLocs;
4117 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4118 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4119 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4120 CCValAssign &VA = RVLocs[i];
4121 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4122 return false;
4123 }
4124 }
4125
4126 // Check that the call results are passed in the same way.
4127 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4128 RetCC_X86, RetCC_X86))
4129 return false;
4130 // The callee has to preserve all registers the caller needs to preserve.
4131 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4132 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4133 if (!CCMatch) {
4134 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4135 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4136 return false;
4137 }
4138
4139 unsigned StackArgsSize = 0;
4140
4141 // If the callee takes no arguments then go on to check the results of the
4142 // call.
4143 if (!Outs.empty()) {
4144 // Check if stack adjustment is needed. For now, do not do this if any
4145 // argument is passed on the stack.
4146 SmallVector<CCValAssign, 16> ArgLocs;
4147 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4148
4149 // Allocate shadow area for Win64
4150 if (IsCalleeWin64)
4151 CCInfo.AllocateStack(32, 8);
4152
4153 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4154 StackArgsSize = CCInfo.getNextStackOffset();
4155
4156 if (CCInfo.getNextStackOffset()) {
4157 // Check if the arguments are already laid out in the right way as
4158 // the caller's fixed stack objects.
4159 MachineFrameInfo &MFI = MF.getFrameInfo();
4160 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4161 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4162 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4163 CCValAssign &VA = ArgLocs[i];
4164 SDValue Arg = OutVals[i];
4165 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4166 if (VA.getLocInfo() == CCValAssign::Indirect)
4167 return false;
4168 if (!VA.isRegLoc()) {
4169 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4170 MFI, MRI, TII, VA))
4171 return false;
4172 }
4173 }
4174 }
4175
4176 bool PositionIndependent = isPositionIndependent();
4177 // If the tailcall address may be in a register, then make sure it's
4178 // possible to register allocate for it. In 32-bit, the call address can
4179 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4180 // callee-saved registers are restored. These happen to be the same
4181 // registers used to pass 'inreg' arguments so watch out for those.
4182 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4183 !isa<ExternalSymbolSDNode>(Callee)) ||
4184 PositionIndependent)) {
4185 unsigned NumInRegs = 0;
4186 // In PIC we need an extra register to formulate the address computation
4187 // for the callee.
4188 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4189
4190 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4191 CCValAssign &VA = ArgLocs[i];
4192 if (!VA.isRegLoc())
4193 continue;
4194 unsigned Reg = VA.getLocReg();
4195 switch (Reg) {
4196 default: break;
4197 case X86::EAX: case X86::EDX: case X86::ECX:
4198 if (++NumInRegs == MaxInRegs)
4199 return false;
4200 break;
4201 }
4202 }
4203 }
4204
4205 const MachineRegisterInfo &MRI = MF.getRegInfo();
4206 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4207 return false;
4208 }
4209
4210 bool CalleeWillPop =
4211 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4212 MF.getTarget().Options.GuaranteedTailCallOpt);
4213
4214 if (unsigned BytesToPop =
4215 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4216 // If we have bytes to pop, the callee must pop them.
4217 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4218 if (!CalleePopMatches)
4219 return false;
4220 } else if (CalleeWillPop && StackArgsSize > 0) {
4221 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4222 return false;
4223 }
4224
4225 return true;
4226}
4227
4228FastISel *
4229X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4230 const TargetLibraryInfo *libInfo) const {
4231 return X86::createFastISel(funcInfo, libInfo);
4232}
4233
4234//===----------------------------------------------------------------------===//
4235// Other Lowering Hooks
4236//===----------------------------------------------------------------------===//
4237
4238static bool MayFoldLoad(SDValue Op) {
4239 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4240}
4241
4242static bool MayFoldIntoStore(SDValue Op) {
4243 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4244}
4245
4246static bool MayFoldIntoZeroExtend(SDValue Op) {
4247 if (Op.hasOneUse()) {
4248 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4249 return (ISD::ZERO_EXTEND == Opcode);
4250 }
4251 return false;
4252}
4253
4254static bool isTargetShuffle(unsigned Opcode) {
4255 switch(Opcode) {
4256 default: return false;
4257 case X86ISD::BLENDI:
4258 case X86ISD::PSHUFB:
4259 case X86ISD::PSHUFD:
4260 case X86ISD::PSHUFHW:
4261 case X86ISD::PSHUFLW:
4262 case X86ISD::SHUFP:
4263 case X86ISD::INSERTPS:
4264 case X86ISD::PALIGNR:
4265 case X86ISD::VSHLDQ:
4266 case X86ISD::VSRLDQ:
4267 case X86ISD::MOVLHPS:
4268 case X86ISD::MOVLHPD:
4269 case X86ISD::MOVHLPS:
4270 case X86ISD::MOVLPS:
4271 case X86ISD::MOVLPD:
4272 case X86ISD::MOVSHDUP:
4273 case X86ISD::MOVSLDUP:
4274 case X86ISD::MOVDDUP:
4275 case X86ISD::MOVSS:
4276 case X86ISD::MOVSD:
4277 case X86ISD::UNPCKL:
4278 case X86ISD::UNPCKH:
4279 case X86ISD::VBROADCAST:
4280 case X86ISD::VPERMILPI:
4281 case X86ISD::VPERMILPV:
4282 case X86ISD::VPERM2X128:
4283 case X86ISD::VPERMIL2:
4284 case X86ISD::VPERMI:
4285 case X86ISD::VPPERM:
4286 case X86ISD::VPERMV:
4287 case X86ISD::VPERMV3:
4288 case X86ISD::VPERMIV3:
4289 case X86ISD::VZEXT_MOVL:
4290 return true;
4291 }
4292}
4293
4294static bool isTargetShuffleVariableMask(unsigned Opcode) {
4295 switch (Opcode) {
4296 default: return false;
4297 // Target Shuffles.
4298 case X86ISD::PSHUFB:
4299 case X86ISD::VPERMILPV:
4300 case X86ISD::VPERMIL2:
4301 case X86ISD::VPPERM:
4302 case X86ISD::VPERMV:
4303 case X86ISD::VPERMV3:
4304 case X86ISD::VPERMIV3:
4305 return true;
4306 // 'Faux' Target Shuffles.
4307 case ISD::AND:
4308 case X86ISD::ANDNP:
4309 return true;
4310 }
4311}
4312
4313SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4314 MachineFunction &MF = DAG.getMachineFunction();
4315 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4316 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4317 int ReturnAddrIndex = FuncInfo->getRAIndex();
4318
4319 if (ReturnAddrIndex == 0) {
4320 // Set up a frame object for the return address.
4321 unsigned SlotSize = RegInfo->getSlotSize();
4322 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4323 -(int64_t)SlotSize,
4324 false);
4325 FuncInfo->setRAIndex(ReturnAddrIndex);
4326 }
4327
4328 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4329}
4330
4331bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4332 bool hasSymbolicDisplacement) {
4333 // Offset should fit into 32 bit immediate field.
4334 if (!isInt<32>(Offset))
4335 return false;
4336
4337 // If we don't have a symbolic displacement - we don't have any extra
4338 // restrictions.
4339 if (!hasSymbolicDisplacement)
4340 return true;
4341
4342 // FIXME: Some tweaks might be needed for medium code model.
4343 if (M != CodeModel::Small && M != CodeModel::Kernel)
4344 return false;
4345
4346 // For small code model we assume that latest object is 16MB before end of 31
4347 // bits boundary. We may also accept pretty large negative constants knowing
4348 // that all objects are in the positive half of address space.
4349 if (M == CodeModel::Small && Offset < 16*1024*1024)
4350 return true;
4351
4352 // For kernel code model we know that all object resist in the negative half
4353 // of 32bits address space. We may not accept negative offsets, since they may
4354 // be just off and we may accept pretty large positive ones.
4355 if (M == CodeModel::Kernel && Offset >= 0)
4356 return true;
4357
4358 return false;
4359}
4360
4361/// Determines whether the callee is required to pop its own arguments.
4362/// Callee pop is necessary to support tail calls.
4363bool X86::isCalleePop(CallingConv::ID CallingConv,
4364 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4365 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4366 // can guarantee TCO.
4367 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4368 return true;
4369
4370 switch (CallingConv) {
4371 default:
4372 return false;
4373 case CallingConv::X86_StdCall:
4374 case CallingConv::X86_FastCall:
4375 case CallingConv::X86_ThisCall:
4376 case CallingConv::X86_VectorCall:
4377 return !is64Bit;
4378 }
4379}
4380
4381/// \brief Return true if the condition is an unsigned comparison operation.
4382static bool isX86CCUnsigned(unsigned X86CC) {
4383 switch (X86CC) {
4384 default:
4385 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4385)
;
4386 case X86::COND_E:
4387 case X86::COND_NE:
4388 case X86::COND_B:
4389 case X86::COND_A:
4390 case X86::COND_BE:
4391 case X86::COND_AE:
4392 return true;
4393 case X86::COND_G:
4394 case X86::COND_GE:
4395 case X86::COND_L:
4396 case X86::COND_LE:
4397 return false;
4398 }
4399}
4400
4401static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4402 switch (SetCCOpcode) {
4403 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4403)
;
4404 case ISD::SETEQ: return X86::COND_E;
4405 case ISD::SETGT: return X86::COND_G;
4406 case ISD::SETGE: return X86::COND_GE;
4407 case ISD::SETLT: return X86::COND_L;
4408 case ISD::SETLE: return X86::COND_LE;
4409 case ISD::SETNE: return X86::COND_NE;
4410 case ISD::SETULT: return X86::COND_B;
4411 case ISD::SETUGT: return X86::COND_A;
4412 case ISD::SETULE: return X86::COND_BE;
4413 case ISD::SETUGE: return X86::COND_AE;
4414 }
4415}
4416
4417/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4418/// condition code, returning the condition code and the LHS/RHS of the
4419/// comparison to make.
4420static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4421 bool isFP, SDValue &LHS, SDValue &RHS,
4422 SelectionDAG &DAG) {
4423 if (!isFP) {
4424 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4425 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4426 // X > -1 -> X == 0, jump !sign.
4427 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4428 return X86::COND_NS;
4429 }
4430 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4431 // X < 0 -> X == 0, jump on sign.
4432 return X86::COND_S;
4433 }
4434 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
4435 // X < 1 -> X <= 0
4436 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4437 return X86::COND_LE;
4438 }
4439 }
4440
4441 return TranslateIntegerX86CC(SetCCOpcode);
4442 }
4443
4444 // First determine if it is required or is profitable to flip the operands.
4445
4446 // If LHS is a foldable load, but RHS is not, flip the condition.
4447 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4448 !ISD::isNON_EXTLoad(RHS.getNode())) {
4449 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4450 std::swap(LHS, RHS);
4451 }
4452
4453 switch (SetCCOpcode) {
4454 default: break;
4455 case ISD::SETOLT:
4456 case ISD::SETOLE:
4457 case ISD::SETUGT:
4458 case ISD::SETUGE:
4459 std::swap(LHS, RHS);
4460 break;
4461 }
4462
4463 // On a floating point condition, the flags are set as follows:
4464 // ZF PF CF op
4465 // 0 | 0 | 0 | X > Y
4466 // 0 | 0 | 1 | X < Y
4467 // 1 | 0 | 0 | X == Y
4468 // 1 | 1 | 1 | unordered
4469 switch (SetCCOpcode) {
4470 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4470)
;
4471 case ISD::SETUEQ:
4472 case ISD::SETEQ: return X86::COND_E;
4473 case ISD::SETOLT: // flipped
4474 case ISD::SETOGT:
4475 case ISD::SETGT: return X86::COND_A;
4476 case ISD::SETOLE: // flipped
4477 case ISD::SETOGE:
4478 case ISD::SETGE: return X86::COND_AE;
4479 case ISD::SETUGT: // flipped
4480 case ISD::SETULT:
4481 case ISD::SETLT: return X86::COND_B;
4482 case ISD::SETUGE: // flipped
4483 case ISD::SETULE:
4484 case ISD::SETLE: return X86::COND_BE;
4485 case ISD::SETONE:
4486 case ISD::SETNE: return X86::COND_NE;
4487 case ISD::SETUO: return X86::COND_P;
4488 case ISD::SETO: return X86::COND_NP;
4489 case ISD::SETOEQ:
4490 case ISD::SETUNE: return X86::COND_INVALID;
4491 }
4492}
4493
4494/// Is there a floating point cmov for the specific X86 condition code?
4495/// Current x86 isa includes the following FP cmov instructions:
4496/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4497static bool hasFPCMov(unsigned X86CC) {
4498 switch (X86CC) {
4499 default:
4500 return false;
4501 case X86::COND_B:
4502 case X86::COND_BE:
4503 case X86::COND_E:
4504 case X86::COND_P:
4505 case X86::COND_A:
4506 case X86::COND_AE:
4507 case X86::COND_NE:
4508 case X86::COND_NP:
4509 return true;
4510 }
4511}
4512
4513
4514bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4515 const CallInst &I,
4516 unsigned Intrinsic) const {
4517
4518 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4519 if (!IntrData)
4520 return false;
4521
4522 Info.opc = ISD::INTRINSIC_W_CHAIN;
4523 Info.readMem = false;
4524 Info.writeMem = false;
4525 Info.vol = false;
4526 Info.offset = 0;
4527
4528 switch (IntrData->Type) {
4529 case EXPAND_FROM_MEM: {
4530 Info.ptrVal = I.getArgOperand(0);
4531 Info.memVT = MVT::getVT(I.getType());
4532 Info.align = 1;
4533 Info.readMem = true;
4534 break;
4535 }
4536 case COMPRESS_TO_MEM: {
4537 Info.ptrVal = I.getArgOperand(0);
4538 Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
4539 Info.align = 1;
4540 Info.writeMem = true;
4541 break;
4542 }
4543 case TRUNCATE_TO_MEM_VI8:
4544 case TRUNCATE_TO_MEM_VI16:
4545 case TRUNCATE_TO_MEM_VI32: {
4546 Info.ptrVal = I.getArgOperand(0);
4547 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4548 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4549 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4550 ScalarVT = MVT::i8;
4551 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4552 ScalarVT = MVT::i16;
4553 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4554 ScalarVT = MVT::i32;
4555
4556 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4557 Info.align = 1;
4558 Info.writeMem = true;
4559 break;
4560 }
4561 default:
4562 return false;
4563 }
4564
4565 return true;
4566}
4567
4568/// Returns true if the target can instruction select the
4569/// specified FP immediate natively. If false, the legalizer will
4570/// materialize the FP immediate as a load from a constant pool.
4571bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4572 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4573 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4574 return true;
4575 }
4576 return false;
4577}
4578
4579bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4580 ISD::LoadExtType ExtTy,
4581 EVT NewVT) const {
4582 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4583 // relocation target a movq or addq instruction: don't let the load shrink.
4584 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4585 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4586 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4587 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4588 return true;
4589}
4590
4591/// \brief Returns true if it is beneficial to convert a load of a constant
4592/// to just the constant itself.
4593bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4594 Type *Ty) const {
4595 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4595, __PRETTY_FUNCTION__))
;
4596
4597 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4598 if (BitSize == 0 || BitSize > 64)
4599 return false;
4600 return true;
4601}
4602
4603bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT,
4604 unsigned Index) const {
4605 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4606 return false;
4607
4608 return (Index == 0 || Index == ResVT.getVectorNumElements());
4609}
4610
4611bool X86TargetLowering::isCheapToSpeculateCttz() const {
4612 // Speculate cttz only if we can directly use TZCNT.
4613 return Subtarget.hasBMI();
4614}
4615
4616bool X86TargetLowering::isCheapToSpeculateCtlz() const {
4617 // Speculate ctlz only if we can directly use LZCNT.
4618 return Subtarget.hasLZCNT();
4619}
4620
4621bool X86TargetLowering::isCtlzFast() const {
4622 return Subtarget.hasFastLZCNT();
4623}
4624
4625bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
4626 const Instruction &AndI) const {
4627 return true;
4628}
4629
4630bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
4631 if (!Subtarget.hasBMI())
4632 return false;
4633
4634 // There are only 32-bit and 64-bit forms for 'andn'.
4635 EVT VT = Y.getValueType();
4636 if (VT != MVT::i32 && VT != MVT::i64)
4637 return false;
4638
4639 return true;
4640}
4641
4642/// Val is the undef sentinel value or equal to the specified value.
4643static bool isUndefOrEqual(int Val, int CmpVal) {
4644 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
4645}
4646
4647/// Val is either the undef or zero sentinel value.
4648static bool isUndefOrZero(int Val) {
4649 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
4650}
4651
4652/// Return true if every element in Mask, beginning
4653/// from position Pos and ending in Pos+Size is the undef sentinel value.
4654static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
4655 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4656 if (Mask[i] != SM_SentinelUndef)
4657 return false;
4658 return true;
4659}
4660
4661/// Return true if Val is undef or if its value falls within the
4662/// specified range (L, H].
4663static bool isUndefOrInRange(int Val, int Low, int Hi) {
4664 return (Val == SM_SentinelUndef) || (Val >= Low && Val < Hi);
4665}
4666
4667/// Return true if every element in Mask is undef or if its value
4668/// falls within the specified range (L, H].
4669static bool isUndefOrInRange(ArrayRef<int> Mask,
4670 int Low, int Hi) {
4671 for (int M : Mask)
4672 if (!isUndefOrInRange(M, Low, Hi))
4673 return false;
4674 return true;
4675}
4676
4677/// Return true if Val is undef, zero or if its value falls within the
4678/// specified range (L, H].
4679static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
4680 return isUndefOrZero(Val) || (Val >= Low && Val < Hi);
4681}
4682
4683/// Return true if every element in Mask is undef, zero or if its value
4684/// falls within the specified range (L, H].
4685static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
4686 for (int M : Mask)
4687 if (!isUndefOrZeroOrInRange(M, Low, Hi))
4688 return false;
4689 return true;
4690}
4691
4692/// Return true if every element in Mask, beginning
4693/// from position Pos and ending in Pos+Size, falls within the specified
4694/// sequential range (Low, Low+Size]. or is undef.
4695static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
4696 unsigned Pos, unsigned Size, int Low) {
4697 for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
4698 if (!isUndefOrEqual(Mask[i], Low))
4699 return false;
4700 return true;
4701}
4702
4703/// Return true if every element in Mask, beginning
4704/// from position Pos and ending in Pos+Size, falls within the specified
4705/// sequential range (Low, Low+Size], or is undef or is zero.
4706static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4707 unsigned Size, int Low) {
4708 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
4709 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
4710 return false;
4711 return true;
4712}
4713
4714/// Return true if every element in Mask, beginning
4715/// from position Pos and ending in Pos+Size is undef or is zero.
4716static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4717 unsigned Size) {
4718 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4719 if (!isUndefOrZero(Mask[i]))
4720 return false;
4721 return true;
4722}
4723
4724/// \brief Helper function to test whether a shuffle mask could be
4725/// simplified by widening the elements being shuffled.
4726///
4727/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
4728/// leaves it in an unspecified state.
4729///
4730/// NOTE: This must handle normal vector shuffle masks and *target* vector
4731/// shuffle masks. The latter have the special property of a '-2' representing
4732/// a zero-ed lane of a vector.
4733static bool canWidenShuffleElements(ArrayRef<int> Mask,
4734 SmallVectorImpl<int> &WidenedMask) {
4735 WidenedMask.assign(Mask.size() / 2, 0);
4736 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
4737 // If both elements are undef, its trivial.
4738 if (Mask[i] == SM_SentinelUndef && Mask[i + 1] == SM_SentinelUndef) {
4739 WidenedMask[i / 2] = SM_SentinelUndef;
4740 continue;
4741 }
4742
4743 // Check for an undef mask and a mask value properly aligned to fit with
4744 // a pair of values. If we find such a case, use the non-undef mask's value.
4745 if (Mask[i] == SM_SentinelUndef && Mask[i + 1] >= 0 &&
4746 Mask[i + 1] % 2 == 1) {
4747 WidenedMask[i / 2] = Mask[i + 1] / 2;
4748 continue;
4749 }
4750 if (Mask[i + 1] == SM_SentinelUndef && Mask[i] >= 0 && Mask[i] % 2 == 0) {
4751 WidenedMask[i / 2] = Mask[i] / 2;
4752 continue;
4753 }
4754
4755 // When zeroing, we need to spread the zeroing across both lanes to widen.
4756 if (Mask[i] == SM_SentinelZero || Mask[i + 1] == SM_SentinelZero) {
4757 if ((Mask[i] == SM_SentinelZero || Mask[i] == SM_SentinelUndef) &&
4758 (Mask[i + 1] == SM_SentinelZero || Mask[i + 1] == SM_SentinelUndef)) {
4759 WidenedMask[i / 2] = SM_SentinelZero;
4760 continue;
4761 }
4762 return false;
4763 }
4764
4765 // Finally check if the two mask values are adjacent and aligned with
4766 // a pair.
4767 if (Mask[i] != SM_SentinelUndef && Mask[i] % 2 == 0 &&
4768 Mask[i] + 1 == Mask[i + 1]) {
4769 WidenedMask[i / 2] = Mask[i] / 2;
4770 continue;
4771 }
4772
4773 // Otherwise we can't safely widen the elements used in this shuffle.
4774 return false;
4775 }
4776 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4777, __PRETTY_FUNCTION__))
4777 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4777, __PRETTY_FUNCTION__))
;
4778
4779 return true;
4780}
4781
4782/// Helper function to scale a shuffle or target shuffle mask, replacing each
4783/// mask index with the scaled sequential indices for an equivalent narrowed
4784/// mask. This is the reverse process to canWidenShuffleElements, but can always
4785/// succeed.
4786static void scaleShuffleMask(int Scale, ArrayRef<int> Mask,
4787 SmallVectorImpl<int> &ScaledMask) {
4788 assert(0 < Scale && "Unexpected scaling factor")((0 < Scale && "Unexpected scaling factor") ? static_cast
<void> (0) : __assert_fail ("0 < Scale && \"Unexpected scaling factor\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4788, __PRETTY_FUNCTION__))
;
4789 int NumElts = Mask.size();
4790 ScaledMask.assign(NumElts * Scale, -1);
4791
4792 for (int i = 0; i != NumElts; ++i) {
4793 int M = Mask[i];
4794
4795 // Repeat sentinel values in every mask element.
4796 if (M < 0) {
4797 for (int s = 0; s != Scale; ++s)
4798 ScaledMask[(Scale * i) + s] = M;
4799 continue;
4800 }
4801
4802 // Scale mask element and increment across each mask element.
4803 for (int s = 0; s != Scale; ++s)
4804 ScaledMask[(Scale * i) + s] = (Scale * M) + s;
4805 }
4806}
4807
4808/// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector
4809/// extract that is suitable for instruction that extract 128 or 256 bit vectors
4810static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
4811 assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width")(((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width"
) ? static_cast<void> (0) : __assert_fail ("(vecWidth == 128 || vecWidth == 256) && \"Unexpected vector width\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4811, __PRETTY_FUNCTION__))
;
4812 if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
4813 return false;
4814
4815 // The index should be aligned on a vecWidth-bit boundary.
4816 uint64_t Index =
4817 cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
4818
4819 MVT VT = N->getSimpleValueType(0);
4820 unsigned ElSize = VT.getScalarSizeInBits();
4821 bool Result = (Index * ElSize) % vecWidth == 0;
4822
4823 return Result;
4824}
4825
4826/// Return true if the specified INSERT_SUBVECTOR
4827/// operand specifies a subvector insert that is suitable for input to
4828/// insertion of 128 or 256-bit subvectors
4829static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
4830 assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width")(((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width"
) ? static_cast<void> (0) : __assert_fail ("(vecWidth == 128 || vecWidth == 256) && \"Unexpected vector width\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4830, __PRETTY_FUNCTION__))
;
4831 if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
4832 return false;
4833 // The index should be aligned on a vecWidth-bit boundary.
4834 uint64_t Index =
4835 cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
4836
4837 MVT VT = N->getSimpleValueType(0);
4838 unsigned ElSize = VT.getScalarSizeInBits();
4839 bool Result = (Index * ElSize) % vecWidth == 0;
4840
4841 return Result;
4842}
4843
4844bool X86::isVINSERT128Index(SDNode *N) {
4845 return isVINSERTIndex(N, 128);
4846}
4847
4848bool X86::isVINSERT256Index(SDNode *N) {
4849 return isVINSERTIndex(N, 256);
4850}
4851
4852bool X86::isVEXTRACT128Index(SDNode *N) {
4853 return isVEXTRACTIndex(N, 128);
4854}
4855
4856bool X86::isVEXTRACT256Index(SDNode *N) {
4857 return isVEXTRACTIndex(N, 256);
4858}
4859
4860static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
4861 assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width")(((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vecWidth == 128 || vecWidth == 256) && \"Unsupported vector width\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4861, __PRETTY_FUNCTION__))
;
4862 assert(isa<ConstantSDNode>(N->getOperand(1).getNode()) &&((isa<ConstantSDNode>(N->getOperand(1).getNode()) &&
"Illegal extract subvector for VEXTRACT") ? static_cast<void
> (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(1).getNode()) && \"Illegal extract subvector for VEXTRACT\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4863, __PRETTY_FUNCTION__))
4863 "Illegal extract subvector for VEXTRACT")((isa<ConstantSDNode>(N->getOperand(1).getNode()) &&
"Illegal extract subvector for VEXTRACT") ? static_cast<void
> (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(1).getNode()) && \"Illegal extract subvector for VEXTRACT\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4863, __PRETTY_FUNCTION__))
;
4864
4865 uint64_t Index =
4866 cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
4867
4868 MVT VecVT = N->getOperand(0).getSimpleValueType();
4869 MVT ElVT = VecVT.getVectorElementType();
4870
4871 unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
4872 return Index / NumElemsPerChunk;
4873}
4874
4875static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
4876 assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width")(((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vecWidth == 128 || vecWidth == 256) && \"Unsupported vector width\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4876, __PRETTY_FUNCTION__))
;
4877 assert(isa<ConstantSDNode>(N->getOperand(2).getNode()) &&((isa<ConstantSDNode>(N->getOperand(2).getNode()) &&
"Illegal insert subvector for VINSERT") ? static_cast<void
> (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(2).getNode()) && \"Illegal insert subvector for VINSERT\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4878, __PRETTY_FUNCTION__))
4878 "Illegal insert subvector for VINSERT")((isa<ConstantSDNode>(N->getOperand(2).getNode()) &&
"Illegal insert subvector for VINSERT") ? static_cast<void
> (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(2).getNode()) && \"Illegal insert subvector for VINSERT\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4878, __PRETTY_FUNCTION__))
;
4879
4880 uint64_t Index =
4881 cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
4882
4883 MVT VecVT = N->getSimpleValueType(0);
4884 MVT ElVT = VecVT.getVectorElementType();
4885
4886 unsigned NumElemsPerChunk = vecWidth / ElVT.getSizeInBits();
4887 return Index / NumElemsPerChunk;
4888}
4889
4890/// Return the appropriate immediate to extract the specified
4891/// EXTRACT_SUBVECTOR index with VEXTRACTF128 and VINSERTI128 instructions.
4892unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
4893 return getExtractVEXTRACTImmediate(N, 128);
4894}
4895
4896/// Return the appropriate immediate to extract the specified
4897/// EXTRACT_SUBVECTOR index with VEXTRACTF64x4 and VINSERTI64x4 instructions.
4898unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
4899 return getExtractVEXTRACTImmediate(N, 256);
4900}
4901
4902/// Return the appropriate immediate to insert at the specified
4903/// INSERT_SUBVECTOR index with VINSERTF128 and VINSERTI128 instructions.
4904unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
4905 return getInsertVINSERTImmediate(N, 128);
4906}
4907
4908/// Return the appropriate immediate to insert at the specified
4909/// INSERT_SUBVECTOR index with VINSERTF46x4 and VINSERTI64x4 instructions.
4910unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
4911 return getInsertVINSERTImmediate(N, 256);
4912}
4913
4914/// Returns true if Elt is a constant zero or a floating point constant +0.0.
4915bool X86::isZeroNode(SDValue Elt) {
4916 return isNullConstant(Elt) || isNullFPConstant(Elt);
4917}
4918
4919// Build a vector of constants
4920// Use an UNDEF node if MaskElt == -1.
4921// Spilt 64-bit constants in the 32-bit mode.
4922static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
4923 const SDLoc &dl, bool IsMask = false) {
4924
4925 SmallVector<SDValue, 32> Ops;
4926 bool Split = false;
4927
4928 MVT ConstVecVT = VT;
4929 unsigned NumElts = VT.getVectorNumElements();
4930 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
4931 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
4932 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
4933 Split = true;
4934 }
4935
4936 MVT EltVT = ConstVecVT.getVectorElementType();
4937 for (unsigned i = 0; i < NumElts; ++i) {
4938 bool IsUndef = Values[i] < 0 && IsMask;
4939 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
4940 DAG.getConstant(Values[i], dl, EltVT);
4941 Ops.push_back(OpNode);
4942 if (Split)
4943 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
4944 DAG.getConstant(0, dl, EltVT));
4945 }
4946 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
4947 if (Split)
4948 ConstsNode = DAG.getBitcast(VT, ConstsNode);
4949 return ConstsNode;
4950}
4951
4952static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
4953 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4954 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4955, __PRETTY_FUNCTION__))
4955 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4955, __PRETTY_FUNCTION__))
;
4956 SmallVector<SDValue, 32> Ops;
4957 bool Split = false;
4958
4959 MVT ConstVecVT = VT;
4960 unsigned NumElts = VT.getVectorNumElements();
4961 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
4962 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
4963 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
4964 Split = true;
4965 }
4966
4967 MVT EltVT = ConstVecVT.getVectorElementType();
4968 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
4969 if (Undefs[i]) {
4970 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
4971 continue;
4972 }
4973 const APInt &V = Bits[i];
4974 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4974, __PRETTY_FUNCTION__))
;
4975 if (Split) {
4976 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
4977 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
4978 } else if (EltVT == MVT::f32) {
4979 APFloat FV(APFloat::IEEEsingle(), V);
4980 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
4981 } else if (EltVT == MVT::f64) {
4982 APFloat FV(APFloat::IEEEdouble(), V);
4983 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
4984 } else {
4985 Ops.push_back(DAG.getConstant(V, dl, EltVT));
4986 }
4987 }
4988
4989 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
4990 return DAG.getBitcast(VT, ConstsNode);
4991}
4992
4993/// Returns a vector of specified type with all zero elements.
4994static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
4995 SelectionDAG &DAG, const SDLoc &dl) {
4996 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4998, __PRETTY_FUNCTION__))
4997 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4998, __PRETTY_FUNCTION__))
4998 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 4998, __PRETTY_FUNCTION__))
;
4999
5000 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5001 // type. This ensures they get CSE'd. But if the integer type is not
5002 // available, use a floating-point +0.0 instead.
5003 SDValue Vec;
5004 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5005 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5006 } else if (VT.getVectorElementType() == MVT::i1) {
5007 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5008, __PRETTY_FUNCTION__))
5008 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5008, __PRETTY_FUNCTION__))
;
5009 assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&(((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5010, __PRETTY_FUNCTION__))
5010 "Unexpected vector type")(((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5010, __PRETTY_FUNCTION__))
;
5011 Vec = DAG.getConstant(0, dl, VT);
5012 } else {
5013 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5014 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5015 }
5016 return DAG.getBitcast(VT, Vec);
5017}
5018
5019static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5020 const SDLoc &dl, unsigned vectorWidth) {
5021 EVT VT = Vec.getValueType();
5022 EVT ElVT = VT.getVectorElementType();
5023 unsigned Factor = VT.getSizeInBits()/vectorWidth;
5024 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5025 VT.getVectorNumElements()/Factor);
5026
5027 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5028 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5029 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5029, __PRETTY_FUNCTION__))
;
5030
5031 // This is the index of the first element of the vectorWidth-bit chunk
5032 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5033 IdxVal &= ~(ElemsPerChunk - 1);
5034
5035 // If the input is a buildvector just emit a smaller one.
5036 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5037 return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
5038 makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
5039
5040 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5041 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5042}
5043
5044/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5045/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5046/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5047/// instructions or a simple subregister reference. Idx is an index in the
5048/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5049/// lowering EXTRACT_VECTOR_ELT operations easier.
5050static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5051 SelectionDAG &DAG, const SDLoc &dl) {
5052 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5053, __PRETTY_FUNCTION__))
5053 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5053, __PRETTY_FUNCTION__))
;
5054 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5055}
5056
5057/// Generate a DAG to grab 256-bits from a 512-bit vector.
5058static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5059 SelectionDAG &DAG, const SDLoc &dl) {
5060 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5060, __PRETTY_FUNCTION__))
;
5061 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5062}
5063
5064static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5065 SelectionDAG &DAG, const SDLoc &dl,
5066 unsigned vectorWidth) {
5067 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5068, __PRETTY_FUNCTION__))
5068 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5068, __PRETTY_FUNCTION__))
;
5069 // Inserting UNDEF is Result
5070 if (Vec.isUndef())
5071 return Result;
5072 EVT VT = Vec.getValueType();
5073 EVT ElVT = VT.getVectorElementType();
5074 EVT ResultVT = Result.getValueType();
5075
5076 // Insert the relevant vectorWidth bits.
5077 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5078 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5078, __PRETTY_FUNCTION__))
;
5079
5080 // This is the index of the first element of the vectorWidth-bit chunk
5081 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5082 IdxVal &= ~(ElemsPerChunk - 1);
5083
5084 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5085 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5086}
5087
5088/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5089/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5090/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5091/// simple superregister reference. Idx is an index in the 128 bits
5092/// we want. It need not be aligned to a 128-bit boundary. That makes
5093/// lowering INSERT_VECTOR_ELT operations easier.
5094static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5095 SelectionDAG &DAG, const SDLoc &dl) {
5096 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5096, __PRETTY_FUNCTION__))
;
5097 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5098}
5099
5100static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5101 SelectionDAG &DAG, const SDLoc &dl) {
5102 assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!")((Vec.getValueType().is256BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is256BitVector() && \"Unexpected vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5102, __PRETTY_FUNCTION__))
;
5103 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
5104}
5105
5106/// Insert i1-subvector to i1-vector.
5107static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5108 const X86Subtarget &Subtarget) {
5109
5110 SDLoc dl(Op);
5111 SDValue Vec = Op.getOperand(0);
5112 SDValue SubVec = Op.getOperand(1);
5113 SDValue Idx = Op.getOperand(2);
5114
5115 if (!isa<ConstantSDNode>(Idx))
5116 return SDValue();
5117
5118 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5119 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5120 return Op;
5121
5122 MVT OpVT = Op.getSimpleValueType();
5123 MVT SubVecVT = SubVec.getSimpleValueType();
5124 unsigned NumElems = OpVT.getVectorNumElements();
5125 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5126
5127 assert(IdxVal + SubVecNumElems <= NumElems &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5129, __PRETTY_FUNCTION__))
5128 IdxVal % SubVecVT.getSizeInBits() == 0 &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5129, __PRETTY_FUNCTION__))
5129 "Unexpected index value in INSERT_SUBVECTOR")((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5129, __PRETTY_FUNCTION__))
;
5130
5131 // There are 3 possible cases:
5132 // 1. Subvector should be inserted in the lower part (IdxVal == 0)
5133 // 2. Subvector should be inserted in the upper part
5134 // (IdxVal + SubVecNumElems == NumElems)
5135 // 3. Subvector should be inserted in the middle (for example v2i1
5136 // to v16i1, index 2)
5137
5138 // extend to natively supported kshift
5139 MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5140 MVT WideOpVT = OpVT;
5141 if (OpVT.getSizeInBits() < MinVT.getStoreSizeInBits())
5142 WideOpVT = MinVT;
5143
5144 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5145 SDValue Undef = DAG.getUNDEF(WideOpVT);
5146 SDValue WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5147 Undef, SubVec, ZeroIdx);
5148
5149 // Extract sub-vector if require.
5150 auto ExtractSubVec = [&](SDValue V) {
5151 return (WideOpVT == OpVT) ? V : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
5152 OpVT, V, ZeroIdx);
5153 };
5154
5155 if (Vec.isUndef()) {
5156 if (IdxVal != 0) {
5157 SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8);
5158 WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
5159 ShiftBits);
5160 }
5161 return ExtractSubVec(WideSubVec);
5162 }
5163
5164 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5165 NumElems = WideOpVT.getVectorNumElements();
5166 unsigned ShiftLeft = NumElems - SubVecNumElems;
5167 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5168 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
5169 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5170 Vec = ShiftRight ? DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5171 DAG.getConstant(ShiftRight, dl, MVT::i8)) : Vec;
5172 return ExtractSubVec(Vec);
5173 }
5174
5175 if (IdxVal == 0) {
5176 // Zero lower bits of the Vec
5177 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5178 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5179 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5180 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5181 // Merge them together, SubVec should be zero extended.
5182 WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5183 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5184 SubVec, ZeroIdx);
5185 Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
5186 return ExtractSubVec(Vec);
5187 }
5188
5189 // Simple case when we put subvector in the upper part
5190 if (IdxVal + SubVecNumElems == NumElems) {
5191 // Zero upper bits of the Vec
5192 WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
5193 DAG.getConstant(IdxVal, dl, MVT::i8));
5194 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5195 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5196 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5197 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5198 Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
5199 return ExtractSubVec(Vec);
5200 }
5201 // Subvector should be inserted in the middle - use shuffle
5202 WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
5203 SubVec, ZeroIdx);
5204 SmallVector<int, 64> Mask;
5205 for (unsigned i = 0; i < NumElems; ++i)
5206 Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
5207 i : i + NumElems);
5208 return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
5209}
5210
5211/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
5212/// instructions. This is used because creating CONCAT_VECTOR nodes of
5213/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
5214/// large BUILD_VECTORS.
5215static SDValue concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
5216 unsigned NumElems, SelectionDAG &DAG,
5217 const SDLoc &dl) {
5218 SDValue V = insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
5219 return insert128BitVector(V, V2, NumElems / 2, DAG, dl);
5220}
5221
5222static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
5223 unsigned NumElems, SelectionDAG &DAG,
5224 const SDLoc &dl) {
5225 SDValue V = insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
5226 return insert256BitVector(V, V2, NumElems / 2, DAG, dl);
5227}
5228
5229/// Returns a vector of specified type with all bits set.
5230/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5231/// Then bitcast to their original type, ensuring they get CSE'd.
5232static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget,
5233 SelectionDAG &DAG, const SDLoc &dl) {
5234 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5235, __PRETTY_FUNCTION__))
5235 "Expected a 128/256/512-bit vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5235, __PRETTY_FUNCTION__))
;
5236
5237 APInt Ones = APInt::getAllOnesValue(32);
5238 unsigned NumElts = VT.getSizeInBits() / 32;
5239 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5240 return DAG.getBitcast(VT, Vec);
5241}
5242
5243static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
5244 SelectionDAG &DAG) {
5245 EVT InVT = In.getValueType();
5246 assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode")(((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("(X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && \"Unexpected opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5246, __PRETTY_FUNCTION__))
;
5247
5248 if (VT.is128BitVector() && InVT.is128BitVector())
5249 return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
5250 : DAG.getZeroExtendVectorInReg(In, DL, VT);
5251
5252 // For 256-bit vectors, we only need the lower (128-bit) input half.
5253 // For 512-bit vectors, we only need the lower input half or quarter.
5254 if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
5255 int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
5256 In = extractSubVector(In, 0, DAG, DL,
5257 std::max(128, (int)VT.getSizeInBits() / Scale));
5258 }
5259
5260 return DAG.getNode(Opc, DL, VT, In);
5261}
5262
5263/// Generate unpacklo/unpackhi shuffle mask.
5264static void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
5265 bool Unary) {
5266 assert(Mask.empty() && "Expected an empty shuffle mask vector")((Mask.empty() && "Expected an empty shuffle mask vector"
) ? static_cast<void> (0) : __assert_fail ("Mask.empty() && \"Expected an empty shuffle mask vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5266, __PRETTY_FUNCTION__))
;
5267 int NumElts = VT.getVectorNumElements();
5268 int NumEltsInLane = 128 / VT.getScalarSizeInBits();
5269
5270 for (int i = 0; i < NumElts; ++i) {
5271 unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
5272 int Pos = (i % NumEltsInLane) / 2 + LaneStart;
5273 Pos += (Unary ? 0 : NumElts * (i % 2));
5274 Pos += (Lo ? 0 : NumEltsInLane / 2);
5275 Mask.push_back(Pos);
5276 }
5277}
5278
5279/// Returns a vector_shuffle node for an unpackl operation.
5280static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5281 SDValue V1, SDValue V2) {
5282 SmallVector<int, 8> Mask;
5283 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
5284 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5285}
5286
5287/// Returns a vector_shuffle node for an unpackh operation.
5288static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5289 SDValue V1, SDValue V2) {
5290 SmallVector<int, 8> Mask;
5291 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
5292 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5293}
5294
5295/// Return a vector_shuffle of the specified vector of zero or undef vector.
5296/// This produces a shuffle where the low element of V2 is swizzled into the
5297/// zero/undef vector, landing at element Idx.
5298/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
5299static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
5300 bool IsZero,
5301 const X86Subtarget &Subtarget,
5302 SelectionDAG &DAG) {
5303 MVT VT = V2.getSimpleValueType();
5304 SDValue V1 = IsZero
5305 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5306 int NumElems = VT.getVectorNumElements();
5307 SmallVector<int, 16> MaskVec(NumElems);
5308 for (int i = 0; i != NumElems; ++i)
5309 // If this is the insertion idx, put the low elt of V2 here.
5310 MaskVec[i] = (i == Idx) ? NumElems : i;
5311 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
5312}
5313
5314static SDValue peekThroughBitcasts(SDValue V) {
5315 while (V.getNode() && V.getOpcode() == ISD::BITCAST)
5316 V = V.getOperand(0);
5317 return V;
5318}
5319
5320static SDValue peekThroughOneUseBitcasts(SDValue V) {
5321 while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
5322 V.getOperand(0).hasOneUse())
5323 V = V.getOperand(0);
5324 return V;
5325}
5326
5327static const Constant *getTargetConstantFromNode(SDValue Op) {
5328 Op = peekThroughBitcasts(Op);
5329
5330 auto *Load = dyn_cast<LoadSDNode>(Op);
5331 if (!Load)
5332 return nullptr;
5333
5334 SDValue Ptr = Load->getBasePtr();
5335 if (Ptr->getOpcode() == X86ISD::Wrapper ||
5336 Ptr->getOpcode() == X86ISD::WrapperRIP)
5337 Ptr = Ptr->getOperand(0);
5338
5339 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
5340 if (!CNode || CNode->isMachineConstantPoolEntry())
5341 return nullptr;
5342
5343 return dyn_cast<Constant>(CNode->getConstVal());
5344}
5345
5346// Extract raw constant bits from constant pools.
5347static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
5348 APInt &UndefElts,
5349 SmallVectorImpl<APInt> &EltBits,
5350 bool AllowWholeUndefs = true,
5351 bool AllowPartialUndefs = true) {
5352 assert(EltBits.empty() && "Expected an empty EltBits vector")((EltBits.empty() && "Expected an empty EltBits vector"
) ? static_cast<void> (0) : __assert_fail ("EltBits.empty() && \"Expected an empty EltBits vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5352, __PRETTY_FUNCTION__))
;
5353
5354 Op = peekThroughBitcasts(Op);
5355
5356 EVT VT = Op.getValueType();
5357 unsigned SizeInBits = VT.getSizeInBits();
5358 assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!")(((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"
) ? static_cast<void> (0) : __assert_fail ("(SizeInBits % EltSizeInBits) == 0 && \"Can't split constant!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5358, __PRETTY_FUNCTION__))
;
5359 unsigned NumElts = SizeInBits / EltSizeInBits;
5360
5361 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5362 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5363
5364 // Extract all the undef/constant element data and pack into single bitsets.
5365 APInt UndefBits(SizeInBits, 0);
5366 APInt MaskBits(SizeInBits, 0);
5367
5368 // Split the undef/constant single bitset data into the target elements.
5369 auto SplitBitData = [&]() {
5370 // Don't split if we don't allow undef bits.
5371 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
5372 if (UndefBits.getBoolValue() && !AllowUndefs)
5373 return false;
5374
5375 UndefElts = APInt(NumElts, 0);
5376 EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
5377
5378 for (unsigned i = 0; i != NumElts; ++i) {
5379 unsigned BitOffset = i * EltSizeInBits;
5380 APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
5381
5382 // Only treat an element as UNDEF if all bits are UNDEF.
5383 if (UndefEltBits.isAllOnesValue()) {
5384 if (!AllowWholeUndefs)
5385 return false;
5386 UndefElts.setBit(i);
5387 continue;
5388 }
5389
5390 // If only some bits are UNDEF then treat them as zero (or bail if not
5391 // supported).
5392 if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
5393 return false;
5394
5395 APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
5396 EltBits[i] = Bits.getZExtValue();
5397 }
5398 return true;
5399 };
5400
5401 // Collect constant bits and insert into mask/undef bit masks.
5402 auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
5403 unsigned BitOffset) {
5404 if (!Cst)
5405 return false;
5406 unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
5407 if (isa<UndefValue>(Cst)) {
5408 Undefs.setBits(BitOffset, BitOffset + CstSizeInBits);
5409 return true;
5410 }
5411 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5412 Mask.insertBits(CInt->getValue(), BitOffset);
5413 return true;
5414 }
5415 if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5416 Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset);
5417 return true;
5418 }
5419 return false;
5420 };
5421
5422 // Extract constant bits from build vector.
5423 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
5424 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5425 const SDValue &Src = Op.getOperand(i);
5426 unsigned BitOffset = i * SrcEltSizeInBits;
5427 if (Src.isUndef()) {
5428 UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
5429 continue;
5430 }
5431 auto *Cst = cast<ConstantSDNode>(Src);
5432 APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5433 MaskBits.insertBits(Bits, BitOffset);
5434 }
5435 return SplitBitData();
5436 }
5437
5438 // Extract constant bits from constant pool vector.
5439 if (auto *Cst = getTargetConstantFromNode(Op)) {
5440 Type *CstTy = Cst->getType();
5441 if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
5442 return false;
5443
5444 unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
5445 for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i)
5446 if (!CollectConstantBits(Cst->getAggregateElement(i), MaskBits, UndefBits,
5447 i * CstEltSizeInBits))
5448 return false;
5449
5450 return SplitBitData();
5451 }
5452
5453 // Extract constant bits from a broadcasted constant pool scalar.
5454 if (Op.getOpcode() == X86ISD::VBROADCAST &&
5455 EltSizeInBits <= SrcEltSizeInBits) {
5456 if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
5457 APInt Bits(SizeInBits, 0);
5458 APInt Undefs(SizeInBits, 0);
5459 if (CollectConstantBits(Broadcast, Bits, Undefs, 0)) {
5460 for (unsigned i = 0; i != NumSrcElts; ++i) {
5461 MaskBits |= Bits.shl(i * SrcEltSizeInBits);
5462 UndefBits |= Undefs.shl(i * SrcEltSizeInBits);
5463 }
5464 return SplitBitData();
5465 }
5466 }
5467 }
5468
5469 // Extract a rematerialized scalar constant insertion.
5470 if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
5471 Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
5472 isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
5473 auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
5474 MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5475 MaskBits = MaskBits.zext(SizeInBits);
5476 return SplitBitData();
5477 }
5478
5479 return false;
5480}
5481
5482static bool getTargetShuffleMaskIndices(SDValue MaskNode,
5483 unsigned MaskEltSizeInBits,
5484 SmallVectorImpl<uint64_t> &RawMask) {
5485 APInt UndefElts;
5486 SmallVector<APInt, 64> EltBits;
5487
5488 // Extract the raw target constant bits.
5489 // FIXME: We currently don't support UNDEF bits or mask entries.
5490 if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
5491 EltBits, /* AllowWholeUndefs */ false,
5492 /* AllowPartialUndefs */ false))
5493 return false;
5494
5495 // Insert the extracted elements into the mask.
5496 for (APInt Elt : EltBits)
5497 RawMask.push_back(Elt.getZExtValue());
5498
5499 return true;
5500}
5501
5502/// Calculates the shuffle mask corresponding to the target-specific opcode.
5503/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
5504/// operands in \p Ops, and returns true.
5505/// Sets \p IsUnary to true if only one source is used. Note that this will set
5506/// IsUnary for shuffles which use a single input multiple times, and in those
5507/// cases it will adjust the mask to only have indices within that single input.
5508/// It is an error to call this with non-empty Mask/Ops vectors.
5509static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
5510 SmallVectorImpl<SDValue> &Ops,
5511 SmallVectorImpl<int> &Mask, bool &IsUnary) {
5512 unsigned NumElems = VT.getVectorNumElements();
5513 SDValue ImmN;
5514
5515 assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector")((Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"
) ? static_cast<void> (0) : __assert_fail ("Mask.empty() && \"getTargetShuffleMask expects an empty Mask vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5515, __PRETTY_FUNCTION__))
;
5516 assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector")((Ops.empty() && "getTargetShuffleMask expects an empty Ops vector"
) ? static_cast<void> (0) : __assert_fail ("Ops.empty() && \"getTargetShuffleMask expects an empty Ops vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5516, __PRETTY_FUNCTION__))
;
5517
5518 IsUnary = false;
5519 bool IsFakeUnary = false;
5520 switch(N->getOpcode()) {
5521 case X86ISD::BLENDI:
5522 ImmN = N->getOperand(N->getNumOperands()-1);
5523 DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5524 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5525 break;
5526 case X86ISD::SHUFP:
5527 ImmN = N->getOperand(N->getNumOperands()-1);
5528 DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5529 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5530 break;
5531 case X86ISD::INSERTPS:
5532 ImmN = N->getOperand(N->getNumOperands()-1);
5533 DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5534 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5535 break;
5536 case X86ISD::UNPCKH:
5537 DecodeUNPCKHMask(VT, Mask);
5538 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5539 break;
5540 case X86ISD::UNPCKL:
5541 DecodeUNPCKLMask(VT, Mask);
5542 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5543 break;
5544 case X86ISD::MOVHLPS:
5545 DecodeMOVHLPSMask(NumElems, Mask);
5546 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5547 break;
5548 case X86ISD::MOVLHPS:
5549 DecodeMOVLHPSMask(NumElems, Mask);
5550 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5551 break;
5552 case X86ISD::PALIGNR:
5553 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")((VT.getScalarType() == MVT::i8 && "Byte vector expected"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5553, __PRETTY_FUNCTION__))
;
5554 ImmN = N->getOperand(N->getNumOperands()-1);
5555 DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5556 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5557 Ops.push_back(N->getOperand(1));
5558 Ops.push_back(N->getOperand(0));
5559 break;
5560 case X86ISD::VSHLDQ:
5561 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")((VT.getScalarType() == MVT::i8 && "Byte vector expected"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5561, __PRETTY_FUNCTION__))
;
5562 ImmN = N->getOperand(N->getNumOperands() - 1);
5563 DecodePSLLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5564 IsUnary = true;
5565 break;
5566 case X86ISD::VSRLDQ:
5567 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")((VT.getScalarType() == MVT::i8 && "Byte vector expected"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5567, __PRETTY_FUNCTION__))
;
5568 ImmN = N->getOperand(N->getNumOperands() - 1);
5569 DecodePSRLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5570 IsUnary = true;
5571 break;
5572 case X86ISD::PSHUFD:
5573 case X86ISD::VPERMILPI:
5574 ImmN = N->getOperand(N->getNumOperands()-1);
5575 DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5576 IsUnary = true;
5577 break;
5578 case X86ISD::PSHUFHW:
5579 ImmN = N->getOperand(N->getNumOperands()-1);
5580 DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5581 IsUnary = true;
5582 break;
5583 case X86ISD::PSHUFLW:
5584 ImmN = N->getOperand(N->getNumOperands()-1);
5585 DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5586 IsUnary = true;
5587 break;
5588 case X86ISD::VZEXT_MOVL:
5589 DecodeZeroMoveLowMask(VT, Mask);
5590 IsUnary = true;
5591 break;
5592 case X86ISD::VBROADCAST: {
5593 SDValue N0 = N->getOperand(0);
5594 // See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so,
5595 // add the pre-extracted value to the Ops vector.
5596 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5597 N0.getOperand(0).getValueType() == VT &&
5598 N0.getConstantOperandVal(1) == 0)
5599 Ops.push_back(N0.getOperand(0));
5600
5601 // We only decode broadcasts of same-sized vectors, unless the broadcast
5602 // came from an extract from the original width. If we found one, we
5603 // pushed it the Ops vector above.
5604 if (N0.getValueType() == VT || !Ops.empty()) {
5605 DecodeVectorBroadcast(VT, Mask);
5606 IsUnary = true;
5607 break;
5608 }
5609 return false;
5610 }
5611 case X86ISD::VPERMILPV: {
5612 IsUnary = true;
5613 SDValue MaskNode = N->getOperand(1);
5614 unsigned MaskEltSize = VT.getScalarSizeInBits();
5615 SmallVector<uint64_t, 32> RawMask;
5616 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5617 DecodeVPERMILPMask(VT, RawMask, Mask);
5618 break;
5619 }
5620 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5621 DecodeVPERMILPMask(C, MaskEltSize, Mask);
5622 break;
5623 }
5624 return false;
5625 }
5626 case X86ISD::PSHUFB: {
5627 IsUnary = true;
5628 SDValue MaskNode = N->getOperand(1);
5629 SmallVector<uint64_t, 32> RawMask;
5630 if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
5631 DecodePSHUFBMask(RawMask, Mask);
5632 break;
5633 }
5634 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5635 DecodePSHUFBMask(C, Mask);
5636 break;
5637 }
5638 return false;
5639 }
5640 case X86ISD::VPERMI:
5641 ImmN = N->getOperand(N->getNumOperands()-1);
5642 DecodeVPERMMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5643 IsUnary = true;
5644 break;
5645 case X86ISD::MOVSS:
5646 case X86ISD::MOVSD:
5647 DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
5648 break;
5649 case X86ISD::VPERM2X128:
5650 ImmN = N->getOperand(N->getNumOperands()-1);
5651 DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5652 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5653 break;
5654 case X86ISD::MOVSLDUP:
5655 DecodeMOVSLDUPMask(VT, Mask);
5656 IsUnary = true;
5657 break;
5658 case X86ISD::MOVSHDUP:
5659 DecodeMOVSHDUPMask(VT, Mask);
5660 IsUnary = true;
5661 break;
5662 case X86ISD::MOVDDUP:
5663 DecodeMOVDDUPMask(VT, Mask);
5664 IsUnary = true;
5665 break;
5666 case X86ISD::MOVLHPD:
5667 case X86ISD::MOVLPD:
5668 case X86ISD::MOVLPS:
5669 // Not yet implemented
5670 return false;
5671 case X86ISD::VPERMIL2: {
5672 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5673 unsigned MaskEltSize = VT.getScalarSizeInBits();
5674 SDValue MaskNode = N->getOperand(2);
5675 SDValue CtrlNode = N->getOperand(3);
5676 if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
5677 unsigned CtrlImm = CtrlOp->getZExtValue();
5678 SmallVector<uint64_t, 32> RawMask;
5679 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5680 DecodeVPERMIL2PMask(VT, CtrlImm, RawMask, Mask);
5681 break;
5682 }
5683 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5684 DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
5685 break;
5686 }
5687 }
5688 return false;
5689 }
5690 case X86ISD::VPPERM: {
5691 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5692 SDValue MaskNode = N->getOperand(2);
5693 SmallVector<uint64_t, 32> RawMask;
5694 if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
5695 DecodeVPPERMMask(RawMask, Mask);
5696 break;
5697 }
5698 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5699 DecodeVPPERMMask(C, Mask);
5700 break;
5701 }
5702 return false;
5703 }
5704 case X86ISD::VPERMV: {
5705 IsUnary = true;
5706 // Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
5707 Ops.push_back(N->getOperand(1));
5708 SDValue MaskNode = N->getOperand(0);
5709 SmallVector<uint64_t, 32> RawMask;
5710 unsigned MaskEltSize = VT.getScalarSizeInBits();
5711 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5712 DecodeVPERMVMask(RawMask, Mask);
5713 break;
5714 }
5715 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5716 DecodeVPERMVMask(C, MaskEltSize, Mask);
5717 break;
5718 }
5719 return false;
5720 }
5721 case X86ISD::VPERMV3: {
5722 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2);
5723 // Unlike most shuffle nodes, VPERMV3's mask operand is the middle one.
5724 Ops.push_back(N->getOperand(0));
5725 Ops.push_back(N->getOperand(2));
5726 SDValue MaskNode = N->getOperand(1);
5727 unsigned MaskEltSize = VT.getScalarSizeInBits();
5728 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5729 DecodeVPERMV3Mask(C, MaskEltSize, Mask);
5730 break;
5731 }
5732 return false;
5733 }
5734 case X86ISD::VPERMIV3: {
5735 IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2);
5736 // Unlike most shuffle nodes, VPERMIV3's mask operand is the first one.
5737 Ops.push_back(N->getOperand(1));
5738 Ops.push_back(N->getOperand(2));
5739 SDValue MaskNode = N->getOperand(0);
5740 unsigned MaskEltSize = VT.getScalarSizeInBits();
5741 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5742 DecodeVPERMV3Mask(C, MaskEltSize, Mask);
5743 break;
5744 }
5745 return false;
5746 }
5747 default: llvm_unreachable("unknown target shuffle node")::llvm::llvm_unreachable_internal("unknown target shuffle node"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5747)
;
5748 }
5749
5750 // Empty mask indicates the decode failed.
5751 if (Mask.empty())
5752 return false;
5753
5754 // Check if we're getting a shuffle mask with zero'd elements.
5755 if (!AllowSentinelZero)
5756 if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
5757 return false;
5758
5759 // If we have a fake unary shuffle, the shuffle mask is spread across two
5760 // inputs that are actually the same node. Re-map the mask to always point
5761 // into the first input.
5762 if (IsFakeUnary)
5763 for (int &M : Mask)
5764 if (M >= (int)Mask.size())
5765 M -= Mask.size();
5766
5767 // If we didn't already add operands in the opcode-specific code, default to
5768 // adding 1 or 2 operands starting at 0.
5769 if (Ops.empty()) {
5770 Ops.push_back(N->getOperand(0));
5771 if (!IsUnary || IsFakeUnary)
5772 Ops.push_back(N->getOperand(1));
5773 }
5774
5775 return true;
5776}
5777
5778/// Check a target shuffle mask's inputs to see if we can set any values to
5779/// SM_SentinelZero - this is for elements that are known to be zero
5780/// (not just zeroable) from their inputs.
5781/// Returns true if the target shuffle mask was decoded.
5782static bool setTargetShuffleZeroElements(SDValue N,
5783 SmallVectorImpl<int> &Mask,
5784 SmallVectorImpl<SDValue> &Ops) {
5785 bool IsUnary;
5786 if (!isTargetShuffle(N.getOpcode()))
5787 return false;
5788
5789 MVT VT = N.getSimpleValueType();
5790 if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary))
5791 return false;
5792
5793 SDValue V1 = Ops[0];
5794 SDValue V2 = IsUnary ? V1 : Ops[1];
5795
5796 V1 = peekThroughBitcasts(V1);
5797 V2 = peekThroughBitcasts(V2);
5798
5799 assert((VT.getSizeInBits() % Mask.size()) == 0 &&(((VT.getSizeInBits() % Mask.size()) == 0 && "Illegal split of shuffle value type"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % Mask.size()) == 0 && \"Illegal split of shuffle value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5800, __PRETTY_FUNCTION__))
5800 "Illegal split of shuffle value type")(((VT.getSizeInBits() % Mask.size()) == 0 && "Illegal split of shuffle value type"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % Mask.size()) == 0 && \"Illegal split of shuffle value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5800, __PRETTY_FUNCTION__))
;
5801 unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size();
5802
5803 // Extract known constant input data.
5804 APInt UndefSrcElts[2];
5805 SmallVector<APInt, 32> SrcEltBits[2];
5806 bool IsSrcConstant[2] = {
5807 getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0],
5808 SrcEltBits[0], true, false),
5809 getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1],
5810 SrcEltBits[1], true, false)};
5811
5812 for (int i = 0, Size = Mask.size(); i < Size; ++i) {
5813 int M = Mask[i];
5814
5815 // Already decoded as SM_SentinelZero / SM_SentinelUndef.
5816 if (M < 0)
5817 continue;
5818
5819 // Determine shuffle input and normalize the mask.
5820 unsigned SrcIdx = M / Size;
5821 SDValue V = M < Size ? V1 : V2;
5822 M %= Size;
5823
5824 // We are referencing an UNDEF input.
5825 if (V.isUndef()) {
5826 Mask[i] = SM_SentinelUndef;
5827 continue;
5828 }
5829
5830 // SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF.
5831 // TODO: We currently only set UNDEF for integer types - floats use the same
5832 // registers as vectors and many of the scalar folded loads rely on the
5833 // SCALAR_TO_VECTOR pattern.
5834 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
5835 (Size % V.getValueType().getVectorNumElements()) == 0) {
5836 int Scale = Size / V.getValueType().getVectorNumElements();
5837 int Idx = M / Scale;
5838 if (Idx != 0 && !VT.isFloatingPoint())
5839 Mask[i] = SM_SentinelUndef;
5840 else if (Idx == 0 && X86::isZeroNode(V.getOperand(0)))
5841 Mask[i] = SM_SentinelZero;
5842 continue;
5843 }
5844
5845 // Attempt to extract from the source's constant bits.
5846 if (IsSrcConstant[SrcIdx]) {
5847 if (UndefSrcElts[SrcIdx][M])
5848 Mask[i] = SM_SentinelUndef;
5849 else if (SrcEltBits[SrcIdx][M] == 0)
5850 Mask[i] = SM_SentinelZero;
5851 }
5852 }
5853
5854 assert(VT.getVectorNumElements() == Mask.size() &&((VT.getVectorNumElements() == Mask.size() && "Different mask size from vector size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == Mask.size() && \"Different mask size from vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5855, __PRETTY_FUNCTION__))
5855 "Different mask size from vector size!")((VT.getVectorNumElements() == Mask.size() && "Different mask size from vector size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == Mask.size() && \"Different mask size from vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5855, __PRETTY_FUNCTION__))
;
5856 return true;
5857}
5858
5859// Attempt to decode ops that could be represented as a shuffle mask.
5860// The decoded shuffle mask may contain a different number of elements to the
5861// destination value type.
5862static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
5863 SmallVectorImpl<SDValue> &Ops) {
5864 Mask.clear();
5865 Ops.clear();
5866
5867 MVT VT = N.getSimpleValueType();
5868 unsigned NumElts = VT.getVectorNumElements();
5869 unsigned NumSizeInBits = VT.getSizeInBits();
5870 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
5871 assert((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 &&(((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0
&& "Expected byte aligned value types") ? static_cast
<void> (0) : __assert_fail ("(NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 && \"Expected byte aligned value types\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5872, __PRETTY_FUNCTION__))
5872 "Expected byte aligned value types")(((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0
&& "Expected byte aligned value types") ? static_cast
<void> (0) : __assert_fail ("(NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 && \"Expected byte aligned value types\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5872, __PRETTY_FUNCTION__))
;
5873
5874 unsigned Opcode = N.getOpcode();
5875 switch (Opcode) {
5876 case ISD::AND:
5877 case X86ISD::ANDNP: {
5878 // Attempt to decode as a per-byte mask.
5879 APInt UndefElts;
5880 SmallVector<APInt, 32> EltBits;
5881 SDValue N0 = N.getOperand(0);
5882 SDValue N1 = N.getOperand(1);
5883 bool IsAndN = (X86ISD::ANDNP == Opcode);
5884 uint64_t ZeroMask = IsAndN ? 255 : 0;
5885 if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
5886 return false;
5887 for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
5888 if (UndefElts[i]) {
5889 Mask.push_back(SM_SentinelUndef);
5890 continue;
5891 }
5892 uint64_t ByteBits = EltBits[i].getZExtValue();
5893 if (ByteBits != 0 && ByteBits != 255)
5894 return false;
5895 Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i);
5896 }
5897 Ops.push_back(IsAndN ? N1 : N0);
5898 return true;
5899 }
5900 case ISD::SCALAR_TO_VECTOR: {
5901 // Match against a scalar_to_vector of an extract from a similar vector.
5902 SDValue N0 = N.getOperand(0);
5903 if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5904 N0.getOperand(0).getValueType() != VT ||
5905 !isa<ConstantSDNode>(N0.getOperand(1)) ||
5906 NumElts <= N0.getConstantOperandVal(1) ||
5907 !N->isOnlyUserOf(N0.getNode()))
5908 return false;
5909 Ops.push_back(N0.getOperand(0));
5910 Mask.push_back(N0.getConstantOperandVal(1));
5911 Mask.append(NumElts - 1, SM_SentinelUndef);
5912 return true;
5913 }
5914 case X86ISD::PINSRB:
5915 case X86ISD::PINSRW: {
5916 SDValue InVec = N.getOperand(0);
5917 SDValue InScl = N.getOperand(1);
5918 uint64_t InIdx = N.getConstantOperandVal(2);
5919 assert(InIdx < NumElts && "Illegal insertion index")((InIdx < NumElts && "Illegal insertion index") ? static_cast
<void> (0) : __assert_fail ("InIdx < NumElts && \"Illegal insertion index\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5919, __PRETTY_FUNCTION__))
;
5920
5921 // Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern.
5922 if (X86::isZeroNode(InScl)) {
5923 Ops.push_back(InVec);
5924 for (unsigned i = 0; i != NumElts; ++i)
5925 Mask.push_back(i == InIdx ? SM_SentinelZero : (int)i);
5926 return true;
5927 }
5928
5929 // Attempt to recognise a PINSR*(ASSERTZEXT(PEXTR*)) shuffle pattern.
5930 // TODO: Expand this to support INSERT_VECTOR_ELT/etc.
5931 unsigned ExOp =
5932 (X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW);
5933 if (InScl.getOpcode() != ISD::AssertZext ||
5934 InScl.getOperand(0).getOpcode() != ExOp)
5935 return false;
5936
5937 SDValue ExVec = InScl.getOperand(0).getOperand(0);
5938 uint64_t ExIdx = InScl.getOperand(0).getConstantOperandVal(1);
5939 assert(ExIdx < NumElts && "Illegal extraction index")((ExIdx < NumElts && "Illegal extraction index") ?
static_cast<void> (0) : __assert_fail ("ExIdx < NumElts && \"Illegal extraction index\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 5939, __PRETTY_FUNCTION__))
;
5940 Ops.push_back(InVec);
5941 Ops.push_back(ExVec);
5942 for (unsigned i = 0; i != NumElts; ++i)
5943 Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
5944 return true;
5945 }
5946 case X86ISD::VSHLI:
5947 case X86ISD::VSRLI: {
5948 uint64_t ShiftVal = N.getConstantOperandVal(1);
5949 // Out of range bit shifts are guaranteed to be zero.
5950 if (NumBitsPerElt <= ShiftVal) {
5951 Mask.append(NumElts, SM_SentinelZero);
5952 return true;
5953 }
5954
5955 // We can only decode 'whole byte' bit shifts as shuffles.
5956 if ((ShiftVal % 8) != 0)
5957 break;
5958
5959 uint64_t ByteShift = ShiftVal / 8;
5960 unsigned NumBytes = NumSizeInBits / 8;
5961 unsigned NumBytesPerElt = NumBitsPerElt / 8;
5962 Ops.push_back(N.getOperand(0));
5963
5964 // Clear mask to all zeros and insert the shifted byte indices.
5965 Mask.append(NumBytes, SM_SentinelZero);
5966
5967 if (X86ISD::VSHLI == Opcode) {
5968 for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
5969 for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
5970 Mask[i + j] = i + j - ByteShift;
5971 } else {
5972 for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
5973 for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
5974 Mask[i + j - ByteShift] = i + j;
5975 }
5976 return true;
5977 }
5978 case ISD::ZERO_EXTEND_VECTOR_INREG:
5979 case X86ISD::VZEXT: {
5980 // TODO - add support for VPMOVZX with smaller input vector types.
5981 SDValue Src = N.getOperand(0);
5982 MVT SrcVT = Src.getSimpleValueType();
5983 if (NumSizeInBits != SrcVT.getSizeInBits())
5984 break;
5985 DecodeZeroExtendMask(SrcVT.getScalarType(), VT, Mask);
5986 Ops.push_back(Src);
5987 return true;
5988 }
5989 }
5990
5991 return false;
5992}
5993
5994/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly.
5995static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
5996 SmallVectorImpl<int> &Mask) {
5997 int MaskWidth = Mask.size();
5998 SmallVector<SDValue, 16> UsedInputs;
5999 for (int i = 0, e = Inputs.size(); i < e; ++i) {
6000 int lo = UsedInputs.size() * MaskWidth;
6001 int hi = lo + MaskWidth;
6002 if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
6003 UsedInputs.push_back(Inputs[i]);
6004 continue;
6005 }
6006 for (int &M : Mask)
6007 if (lo <= M)
6008 M -= MaskWidth;
6009 }
6010 Inputs = UsedInputs;
6011}
6012
6013/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs
6014/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the
6015/// remaining input indices in case we now have a unary shuffle and adjust the
6016/// inputs accordingly.
6017/// Returns true if the target shuffle mask was decoded.
6018static bool resolveTargetShuffleInputs(SDValue Op,
6019 SmallVectorImpl<SDValue> &Inputs,
6020 SmallVectorImpl<int> &Mask) {
6021 if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
6022 if (!getFauxShuffleMask(Op, Mask, Inputs))
6023 return false;
6024
6025 resolveTargetShuffleInputsAndMask(Inputs, Mask);
6026 return true;
6027}
6028
6029/// Returns the scalar element that will make up the ith
6030/// element of the result of the vector shuffle.
6031static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
6032 unsigned Depth) {
6033 if (Depth == 6)
6034 return SDValue(); // Limit search depth.
6035
6036 SDValue V = SDValue(N, 0);
6037 EVT VT = V.getValueType();
6038 unsigned Opcode = V.getOpcode();
6039
6040 // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
6041 if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
6042 int Elt = SV->getMaskElt(Index);
6043
6044 if (Elt < 0)
6045 return DAG.getUNDEF(VT.getVectorElementType());
6046
6047 unsigned NumElems = VT.getVectorNumElements();
6048 SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
6049 : SV->getOperand(1);
6050 return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
6051 }
6052
6053 // Recurse into target specific vector shuffles to find scalars.
6054 if (isTargetShuffle(Opcode)) {
6055 MVT ShufVT = V.getSimpleValueType();
6056 MVT ShufSVT = ShufVT.getVectorElementType();
6057 int NumElems = (int)ShufVT.getVectorNumElements();
6058 SmallVector<int, 16> ShuffleMask;
6059 SmallVector<SDValue, 16> ShuffleOps;
6060 bool IsUnary;
6061
6062 if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, IsUnary))
6063 return SDValue();
6064
6065 int Elt = ShuffleMask[Index];
6066 if (Elt == SM_SentinelZero)
6067 return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(N), ShufSVT)
6068 : DAG.getConstantFP(+0.0, SDLoc(N), ShufSVT);
6069 if (Elt == SM_SentinelUndef)
6070 return DAG.getUNDEF(ShufSVT);
6071
6072 assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range")((0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range"
) ? static_cast<void> (0) : __assert_fail ("0 <= Elt && Elt < (2*NumElems) && \"Shuffle index out of range\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6072, __PRETTY_FUNCTION__))
;
6073 SDValue NewV = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1];
6074 return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
6075 Depth+1);
6076 }
6077
6078 // Actual nodes that may contain scalar elements
6079 if (Opcode == ISD::BITCAST) {
6080 V = V.getOperand(0);
6081 EVT SrcVT = V.getValueType();
6082 unsigned NumElems = VT.getVectorNumElements();
6083
6084 if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
6085 return SDValue();
6086 }
6087
6088 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
6089 return (Index == 0) ? V.getOperand(0)
6090 : DAG.getUNDEF(VT.getVectorElementType());
6091
6092 if (V.getOpcode() == ISD::BUILD_VECTOR)
6093 return V.getOperand(Index);
6094
6095 return SDValue();
6096}
6097
6098/// Custom lower build_vector of v16i8.
6099static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
6100 unsigned NumNonZero, unsigned NumZero,
6101 SelectionDAG &DAG,
6102 const X86Subtarget &Subtarget) {
6103 if (NumNonZero > 8)
6104 return SDValue();
6105
6106 SDLoc dl(Op);
6107 SDValue V;
6108 bool First = true;
6109
6110 // SSE4.1 - use PINSRB to insert each byte directly.
6111 if (Subtarget.hasSSE41()) {
6112 for (unsigned i = 0; i < 16; ++i) {
6113 bool IsNonZero = (NonZeros & (1 << i)) != 0;
6114 if (IsNonZero) {
6115 // If the build vector contains zeros or our first insertion is not the
6116 // first index then insert into zero vector to break any register
6117 // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
6118 if (First) {
6119 First = false;
6120 if (NumZero || 0 != i)
6121 V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
6122 else {
6123 assert(0 == i && "Expected insertion into zero-index")((0 == i && "Expected insertion into zero-index") ? static_cast
<void> (0) : __assert_fail ("0 == i && \"Expected insertion into zero-index\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6123, __PRETTY_FUNCTION__))
;
6124 V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
6125 V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
6126 V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
6127 V = DAG.getBitcast(MVT::v16i8, V);
6128 continue;
6129 }
6130 }
6131 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V,
6132 Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
6133 }
6134 }
6135
6136 return V;
6137 }
6138
6139 // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
6140 for (unsigned i = 0; i < 16; ++i) {
6141 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
6142 if (ThisIsNonZero && First) {
6143 if (NumZero)
6144 V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
6145 else
6146 V = DAG.getUNDEF(MVT::v8i16);
6147 First = false;
6148 }
6149
6150 if ((i & 1) != 0) {
6151 // FIXME: Investigate extending to i32 instead of just i16.
6152 // FIXME: Investigate combining the first 4 bytes as a i32 instead.
6153 SDValue ThisElt, LastElt;
6154 bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
6155 if (LastIsNonZero) {
6156 LastElt =
6157 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1));
6158 }
6159 if (ThisIsNonZero) {
6160 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
6161 ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt,
6162 DAG.getConstant(8, dl, MVT::i8));
6163 if (LastIsNonZero)
6164 ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
6165 } else
6166 ThisElt = LastElt;
6167
6168 if (ThisElt) {
6169 if (1 == i) {
6170 V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
6171 : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
6172 V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
6173 V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
6174 V = DAG.getBitcast(MVT::v8i16, V);
6175 } else {
6176 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
6177 DAG.getIntPtrConstant(i / 2, dl));
6178 }
6179 }
6180 }
6181 }
6182
6183 return DAG.getBitcast(MVT::v16i8, V);
6184}
6185
6186/// Custom lower build_vector of v8i16.
6187static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
6188 unsigned NumNonZero, unsigned NumZero,
6189 SelectionDAG &DAG,
6190 const X86Subtarget &Subtarget) {
6191 if (NumNonZero > 4)
6192 return SDValue();
6193
6194 SDLoc dl(Op);
6195 SDValue V;
6196 bool First = true;
6197 for (unsigned i = 0; i < 8; ++i) {
6198 bool IsNonZero = (NonZeros & (1 << i)) != 0;
6199 if (IsNonZero) {
6200 // If the build vector contains zeros or our first insertion is not the
6201 // first index then insert into zero vector to break any register
6202 // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
6203 if (First) {
6204 First = false;
6205 if (NumZero || 0 != i)
6206 V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
6207 else {
6208 assert(0 == i && "Expected insertion into zero-index")((0 == i && "Expected insertion into zero-index") ? static_cast
<void> (0) : __assert_fail ("0 == i && \"Expected insertion into zero-index\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6208, __PRETTY_FUNCTION__))
;
6209 V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
6210 V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
6211 V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
6212 V = DAG.getBitcast(MVT::v8i16, V);
6213 continue;
6214 }
6215 }
6216 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V,
6217 Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
6218 }
6219 }
6220
6221 return V;
6222}
6223
6224/// Custom lower build_vector of v4i32 or v4f32.
6225static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
6226 const X86Subtarget &Subtarget) {
6227 // Find all zeroable elements.
6228 std::bitset<4> Zeroable;
6229 for (int i=0; i < 4; ++i) {
6230 SDValue Elt = Op->getOperand(i);
6231 Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt));
6232 }
6233 assert(Zeroable.size() - Zeroable.count() > 1 &&((Zeroable.size() - Zeroable.count() > 1 && "We expect at least two non-zero elements!"
) ? static_cast<void> (0) : __assert_fail ("Zeroable.size() - Zeroable.count() > 1 && \"We expect at least two non-zero elements!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6234, __PRETTY_FUNCTION__))
6234 "We expect at least two non-zero elements!")((Zeroable.size() - Zeroable.count() > 1 && "We expect at least two non-zero elements!"
) ? static_cast<void> (0) : __assert_fail ("Zeroable.size() - Zeroable.count() > 1 && \"We expect at least two non-zero elements!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6234, __PRETTY_FUNCTION__))
;
6235
6236 // We only know how to deal with build_vector nodes where elements are either
6237 // zeroable or extract_vector_elt with constant index.
6238 SDValue FirstNonZero;
6239 unsigned FirstNonZeroIdx;
6240 for (unsigned i=0; i < 4; ++i) {
6241 if (Zeroable[i])
6242 continue;
6243 SDValue Elt = Op->getOperand(i);
6244 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6245 !isa<ConstantSDNode>(Elt.getOperand(1)))
6246 return SDValue();
6247 // Make sure that this node is extracting from a 128-bit vector.
6248 MVT VT = Elt.getOperand(0).getSimpleValueType();
6249 if (!VT.is128BitVector())
6250 return SDValue();
6251 if (!FirstNonZero.getNode()) {
6252 FirstNonZero = Elt;
6253 FirstNonZeroIdx = i;
6254 }
6255 }
6256
6257 assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!")((FirstNonZero.getNode() && "Unexpected build vector of all zeros!"
) ? static_cast<void> (0) : __assert_fail ("FirstNonZero.getNode() && \"Unexpected build vector of all zeros!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6257, __PRETTY_FUNCTION__))
;
6258 SDValue V1 = FirstNonZero.getOperand(0);
6259 MVT VT = V1.getSimpleValueType();
6260
6261 // See if this build_vector can be lowered as a blend with zero.
6262 SDValue Elt;
6263 unsigned EltMaskIdx, EltIdx;
6264 int Mask[4];
6265 for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
6266 if (Zeroable[EltIdx]) {
6267 // The zero vector will be on the right hand side.
6268 Mask[EltIdx] = EltIdx+4;
6269 continue;
6270 }
6271
6272 Elt = Op->getOperand(EltIdx);
6273 // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
6274 EltMaskIdx = cast<ConstantSDNode>(Elt.getOperand(1))->getZExtValue();
6275 if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
6276 break;
6277 Mask[EltIdx] = EltIdx;
6278 }
6279
6280 if (EltIdx == 4) {
6281 // Let the shuffle legalizer deal with blend operations.
6282 SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
6283 if (V1.getSimpleValueType() != VT)
6284 V1 = DAG.getBitcast(VT, V1);
6285 return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, Mask);
6286 }
6287
6288 // See if we can lower this build_vector to a INSERTPS.
6289 if (!Subtarget.hasSSE41())
6290 return SDValue();
6291
6292 SDValue V2 = Elt.getOperand(0);
6293 if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
6294 V1 = SDValue();
6295
6296 bool CanFold = true;
6297 for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
6298 if (Zeroable[i])
6299 continue;
6300
6301 SDValue Current = Op->getOperand(i);
6302 SDValue SrcVector = Current->getOperand(0);
6303 if (!V1.getNode())
6304 V1 = SrcVector;
6305 CanFold = SrcVector == V1 &&
6306 cast<ConstantSDNode>(Current.getOperand(1))->getZExtValue() == i;
6307 }
6308
6309 if (!CanFold)
6310 return SDValue();
6311
6312 assert(V1.getNode() && "Expected at least two non-zero elements!")((V1.getNode() && "Expected at least two non-zero elements!"
) ? static_cast<void> (0) : __assert_fail ("V1.getNode() && \"Expected at least two non-zero elements!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6312, __PRETTY_FUNCTION__))
;
6313 if (V1.getSimpleValueType() != MVT::v4f32)
6314 V1 = DAG.getBitcast(MVT::v4f32, V1);
6315 if (V2.getSimpleValueType() != MVT::v4f32)
6316 V2 = DAG.getBitcast(MVT::v4f32, V2);
6317
6318 // Ok, we can emit an INSERTPS instruction.
6319 unsigned ZMask = Zeroable.to_ulong();
6320
6321 unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
6322 assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!")(((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"
) ? static_cast<void> (0) : __assert_fail ("(InsertPSMask & ~0xFFu) == 0 && \"Invalid mask!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6322, __PRETTY_FUNCTION__))
;
6323 SDLoc DL(Op);
6324 SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
6325 DAG.getIntPtrConstant(InsertPSMask, DL));
6326 return DAG.getBitcast(VT, Result);
6327}
6328
6329/// Return a vector logical shift node.
6330static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
6331 SelectionDAG &DAG, const TargetLowering &TLI,
6332 const SDLoc &dl) {
6333 assert(VT.is128BitVector() && "Unknown type for VShift")((VT.is128BitVector() && "Unknown type for VShift") ?
static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && \"Unknown type for VShift\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6333, __PRETTY_FUNCTION__))
;
6334 MVT ShVT = MVT::v16i8;
6335 unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
6336 SrcOp = DAG.getBitcast(ShVT, SrcOp);
6337 MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
6338 assert(NumBits % 8 == 0 && "Only support byte sized shifts")((NumBits % 8 == 0 && "Only support byte sized shifts"
) ? static_cast<void> (0) : __assert_fail ("NumBits % 8 == 0 && \"Only support byte sized shifts\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6338, __PRETTY_FUNCTION__))
;
6339 SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
6340 return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
6341}
6342
6343static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
6344 SelectionDAG &DAG) {
6345
6346 // Check if the scalar load can be widened into a vector load. And if
6347 // the address is "base + cst" see if the cst can be "absorbed" into
6348 // the shuffle mask.
6349 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
6350 SDValue Ptr = LD->getBasePtr();
6351 if (!ISD::isNormalLoad(LD) || LD->isVolatile())
6352 return SDValue();
6353 EVT PVT = LD->getValueType(0);
6354 if (PVT != MVT::i32 && PVT != MVT::f32)
6355 return SDValue();
6356
6357 int FI = -1;
6358 int64_t Offset = 0;
6359 if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
6360 FI = FINode->getIndex();
6361 Offset = 0;
6362 } else if (DAG.isBaseWithConstantOffset(Ptr) &&
6363 isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
6364 FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
6365 Offset = Ptr.getConstantOperandVal(1);
6366 Ptr = Ptr.getOperand(0);
6367 } else {
6368 return SDValue();
6369 }
6370
6371 // FIXME: 256-bit vector instructions don't require a strict alignment,
6372 // improve this code to support it better.
6373 unsigned RequiredAlign = VT.getSizeInBits()/8;
6374 SDValue Chain = LD->getChain();
6375 // Make sure the stack object alignment is at least 16 or 32.
6376 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6377 if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
6378 if (MFI.isFixedObjectIndex(FI)) {
6379 // Can't change the alignment. FIXME: It's possible to compute
6380 // the exact stack offset and reference FI + adjust offset instead.
6381 // If someone *really* cares about this. That's the way to implement it.
6382 return SDValue();
6383 } else {
6384 MFI.setObjectAlignment(FI, RequiredAlign);
6385 }
6386 }
6387
6388 // (Offset % 16 or 32) must be multiple of 4. Then address is then
6389 // Ptr + (Offset & ~15).
6390 if (Offset < 0)
6391 return SDValue();
6392 if ((Offset % RequiredAlign) & 3)
6393 return SDValue();
6394 int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1);
6395 if (StartOffset) {
6396 SDLoc DL(Ptr);
6397 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
6398 DAG.getConstant(StartOffset, DL, Ptr.getValueType()));
6399 }
6400
6401 int EltNo = (Offset - StartOffset) >> 2;
6402 unsigned NumElems = VT.getVectorNumElements();
6403
6404 EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
6405 SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
6406 LD->getPointerInfo().getWithOffset(StartOffset));
6407
6408 SmallVector<int, 8> Mask(NumElems, EltNo);
6409
6410 return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), Mask);
6411 }
6412
6413 return SDValue();
6414}
6415
6416/// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
6417/// elements can be replaced by a single large load which has the same value as
6418/// a build_vector or insert_subvector whose loaded operands are 'Elts'.
6419///
6420/// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a
6421static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
6422 const SDLoc &DL, SelectionDAG &DAG,
6423 bool isAfterLegalize) {
6424 unsigned NumElems = Elts.size();
6425
6426 int LastLoadedElt = -1;
6427 SmallBitVector LoadMask(NumElems, false);
6428 SmallBitVector ZeroMask(NumElems, false);
6429 SmallBitVector UndefMask(NumElems, false);
6430
6431 // For each element in the initializer, see if we've found a load, zero or an
6432 // undef.
6433 for (unsigned i = 0; i < NumElems; ++i) {
6434 SDValue Elt = peekThroughBitcasts(Elts[i]);
6435 if (!Elt.getNode())
6436 return SDValue();
6437
6438 if (Elt.isUndef())
6439 UndefMask[i] = true;
6440 else if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode()))
6441 ZeroMask[i] = true;
6442 else if (ISD::isNON_EXTLoad(Elt.getNode())) {
6443 LoadMask[i] = true;
6444 LastLoadedElt = i;
6445 // Each loaded element must be the correct fractional portion of the
6446 // requested vector load.
6447 if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits())
6448 return SDValue();
6449 } else
6450 return SDValue();
6451 }
6452 assert((ZeroMask | UndefMask | LoadMask).count() == NumElems &&(((ZeroMask | UndefMask | LoadMask).count() == NumElems &&
"Incomplete element masks") ? static_cast<void> (0) : __assert_fail
("(ZeroMask | UndefMask | LoadMask).count() == NumElems && \"Incomplete element masks\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6453, __PRETTY_FUNCTION__))
6453 "Incomplete element masks")(((ZeroMask | UndefMask | LoadMask).count() == NumElems &&
"Incomplete element masks") ? static_cast<void> (0) : __assert_fail
("(ZeroMask | UndefMask | LoadMask).count() == NumElems && \"Incomplete element masks\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6453, __PRETTY_FUNCTION__))
;
6454
6455 // Handle Special Cases - all undef or undef/zero.
6456 if (UndefMask.count() == NumElems)
6457 return DAG.getUNDEF(VT);
6458
6459 // FIXME: Should we return this as a BUILD_VECTOR instead?
6460 if ((ZeroMask | UndefMask).count() == NumElems)
6461 return VT.isInteger() ? DAG.getConstant(0, DL, VT)
6462 : DAG.getConstantFP(0.0, DL, VT);
6463
6464 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6465 int FirstLoadedElt = LoadMask.find_first();
6466 SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]);
6467 LoadSDNode *LDBase = cast<LoadSDNode>(EltBase);
6468 EVT LDBaseVT = EltBase.getValueType();
6469
6470 // Consecutive loads can contain UNDEFS but not ZERO elements.
6471 // Consecutive loads with UNDEFs and ZEROs elements require a
6472 // an additional shuffle stage to clear the ZERO elements.
6473 bool IsConsecutiveLoad = true;
6474 bool IsConsecutiveLoadWithZeros = true;
6475 for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
6476 if (LoadMask[i]) {
6477 SDValue Elt = peekThroughBitcasts(Elts[i]);
6478 LoadSDNode *LD = cast<LoadSDNode>(Elt);
6479 if (!DAG.areNonVolatileConsecutiveLoads(
6480 LD, LDBase, Elt.getValueType().getStoreSizeInBits() / 8,
6481 i - FirstLoadedElt)) {
6482 IsConsecutiveLoad = false;
6483 IsConsecutiveLoadWithZeros = false;
6484 break;
6485 }
6486 } else if (ZeroMask[i]) {
6487 IsConsecutiveLoad = false;
6488 }
6489 }
6490
6491 auto CreateLoad = [&DAG, &DL](EVT VT, LoadSDNode *LDBase) {
6492 auto MMOFlags = LDBase->getMemOperand()->getFlags();
6493 assert(!(MMOFlags & MachineMemOperand::MOVolatile) &&((!(MMOFlags & MachineMemOperand::MOVolatile) && "Cannot merge volatile loads."
) ? static_cast<void> (0) : __assert_fail ("!(MMOFlags & MachineMemOperand::MOVolatile) && \"Cannot merge volatile loads.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6494, __PRETTY_FUNCTION__))
6494 "Cannot merge volatile loads.")((!(MMOFlags & MachineMemOperand::MOVolatile) && "Cannot merge volatile loads."
) ? static_cast<void> (0) : __assert_fail ("!(MMOFlags & MachineMemOperand::MOVolatile) && \"Cannot merge volatile loads.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6494, __PRETTY_FUNCTION__))
;
6495 SDValue NewLd =
6496 DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
6497 LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
6498
6499 if (LDBase->hasAnyUseOfValue(1)) {
6500 SDValue NewChain =
6501 DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1),
6502 SDValue(NewLd.getNode(), 1));
6503 DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
6504 DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
6505 SDValue(NewLd.getNode(), 1));
6506 }
6507
6508 return NewLd;
6509 };
6510
6511 // LOAD - all consecutive load/undefs (must start/end with a load).
6512 // If we have found an entire vector of loads and undefs, then return a large
6513 // load of the entire vector width starting at the base pointer.
6514 // If the vector contains zeros, then attempt to shuffle those elements.
6515 if (FirstLoadedElt == 0 && LastLoadedElt == (int)(NumElems - 1) &&
6516 (IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) {
6517 assert(LDBase && "Did not find base load for merging consecutive loads")((LDBase && "Did not find base load for merging consecutive loads"
) ? static_cast<void> (0) : __assert_fail ("LDBase && \"Did not find base load for merging consecutive loads\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6517, __PRETTY_FUNCTION__))
;
6518 EVT EltVT = LDBase->getValueType(0);
6519 // Ensure that the input vector size for the merged loads matches the
6520 // cumulative size of the input elements.
6521 if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
6522 return SDValue();
6523
6524 if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
6525 return SDValue();
6526
6527 if (IsConsecutiveLoad)
6528 return CreateLoad(VT, LDBase);
6529
6530 // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
6531 // vector and a zero vector to clear out the zero elements.
6532 if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) {
6533 SmallVector<int, 4> ClearMask(NumElems, -1);
6534 for (unsigned i = 0; i < NumElems; ++i) {
6535 if (ZeroMask[i])
6536 ClearMask[i] = i + NumElems;
6537 else if (LoadMask[i])
6538 ClearMask[i] = i;
6539 }
6540 SDValue V = CreateLoad(VT, LDBase);
6541 SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
6542 : DAG.getConstantFP(0.0, DL, VT);
6543 return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);
6544 }
6545 }
6546
6547 int LoadSize =
6548 (1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits();
6549
6550 // VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
6551 if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
6552 (LoadSize == 32 || LoadSize == 64) &&
6553 ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
6554 MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSize)
6555 : MVT::getIntegerVT(LoadSize);
6556 MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize);
6557 if (TLI.isTypeLegal(VecVT)) {
6558 SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
6559 SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
6560 SDValue ResNode =
6561 DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT,
6562 LDBase->getPointerInfo(),
6563 LDBase->getAlignment(),
6564 false/*isVolatile*/, true/*ReadMem*/,
6565 false/*WriteMem*/);
6566
6567 // Make sure the newly-created LOAD is in the same position as LDBase in
6568 // terms of dependency. We create a TokenFactor for LDBase and ResNode,
6569 // and update uses of LDBase's output chain to use the TokenFactor.
6570 if (LDBase->hasAnyUseOfValue(1)) {
6571 SDValue NewChain =
6572 DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1),
6573 SDValue(ResNode.getNode(), 1));
6574 DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
6575 DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
6576 SDValue(ResNode.getNode(), 1));
6577 }
6578
6579 return DAG.getBitcast(VT, ResNode);
6580 }
6581 }
6582
6583 return SDValue();
6584}
6585
6586static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
6587 unsigned SplatBitSize, LLVMContext &C) {
6588 unsigned ScalarSize = VT.getScalarSizeInBits();
6589 unsigned NumElm = SplatBitSize / ScalarSize;
6590
6591 SmallVector<Constant *, 32> ConstantVec;
6592 for (unsigned i = 0; i < NumElm; i++) {
6593 APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
6594 Constant *Const;
6595 if (VT.isFloatingPoint()) {
6596 assert((ScalarSize == 32 || ScalarSize == 64) &&(((ScalarSize == 32 || ScalarSize == 64) && "Unsupported floating point scalar size"
) ? static_cast<void> (0) : __assert_fail ("(ScalarSize == 32 || ScalarSize == 64) && \"Unsupported floating point scalar size\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6597, __PRETTY_FUNCTION__))
6597 "Unsupported floating point scalar size")(((ScalarSize == 32 || ScalarSize == 64) && "Unsupported floating point scalar size"
) ? static_cast<void> (0) : __assert_fail ("(ScalarSize == 32 || ScalarSize == 64) && \"Unsupported floating point scalar size\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6597, __PRETTY_FUNCTION__))
;
6598 if (ScalarSize == 32)
6599 Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat());
6600 else
6601 Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble());
6602 } else
6603 Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
6604 ConstantVec.push_back(Const);
6605 }
6606 return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
6607}
6608
6609static bool isUseOfShuffle(SDNode *N) {
6610 for (auto *U : N->uses()) {
6611 if (isTargetShuffle(U->getOpcode()))
6612 return true;
6613 if (U->getOpcode() == ISD::BITCAST) // Ignore bitcasts
6614 return isUseOfShuffle(U);
6615 }
6616 return false;
6617}
6618
6619/// Attempt to use the vbroadcast instruction to generate a splat value for the
6620/// following cases:
6621/// 1. A splat BUILD_VECTOR which uses:
6622/// a. A single scalar load, or a constant.
6623/// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>).
6624/// 2. A splat shuffle which uses a scalar_to_vector node which comes from
6625/// a scalar load, or a constant.
6626///
6627/// The VBROADCAST node is returned when a pattern is found,
6628/// or SDValue() otherwise.
6629static SDValue LowerVectorBroadcast(BuildVectorSDNode *BVOp, const X86Subtarget &Subtarget,
6630 SelectionDAG &DAG) {
6631 // VBROADCAST requires AVX.
6632 // TODO: Splats could be generated for non-AVX CPUs using SSE
6633 // instructions, but there's less potential gain for only 128-bit vectors.
6634 if (!Subtarget.hasAVX())
6635 return SDValue();
6636
6637 MVT VT = BVOp->getSimpleValueType(0);
6638 SDLoc dl(BVOp);
6639
6640 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Unsupported vector type for broadcast.") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Unsupported vector type for broadcast.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6641, __PRETTY_FUNCTION__))
6641 "Unsupported vector type for broadcast.")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Unsupported vector type for broadcast.") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Unsupported vector type for broadcast.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6641, __PRETTY_FUNCTION__))
;
6642
6643 BitVector UndefElements;
6644 SDValue Ld = BVOp->getSplatValue(&UndefElements);
6645
6646 // We need a splat of a single value to use broadcast, and it doesn't
6647 // make any sense if the value is only in one element of the vector.
6648 if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
6649 APInt SplatValue, Undef;
6650 unsigned SplatBitSize;
6651 bool HasUndef;
6652 // Check if this is a repeated constant pattern suitable for broadcasting.
6653 if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) &&
6654 SplatBitSize > VT.getScalarSizeInBits() &&
6655 SplatBitSize < VT.getSizeInBits()) {
6656 // Avoid replacing with broadcast when it's a use of a shuffle
6657 // instruction to preserve the present custom lowering of shuffles.
6658 if (isUseOfShuffle(BVOp) || BVOp->hasOneUse())
6659 return SDValue();
6660 // replace BUILD_VECTOR with broadcast of the repeated constants.
6661 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6662 LLVMContext *Ctx = DAG.getContext();
6663 MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
6664 if (Subtarget.hasAVX()) {
6665 if (SplatBitSize <= 64 && Subtarget.hasAVX2() &&
6666 !(SplatBitSize == 64 && Subtarget.is32Bit())) {
6667 // Splatted value can fit in one INTEGER constant in constant pool.
6668 // Load the constant and broadcast it.
6669 MVT CVT = MVT::getIntegerVT(SplatBitSize);
6670 Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize);
6671 Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue);
6672 SDValue CP = DAG.getConstantPool(C, PVT);
6673 unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
6674
6675 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6676 Ld = DAG.getLoad(
6677 CVT, dl, DAG.getEntryNode(), CP,
6678 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6679 Alignment);
6680 SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
6681 MVT::getVectorVT(CVT, Repeat), Ld);
6682 return DAG.getBitcast(VT, Brdcst);
6683 } else if (SplatBitSize == 32 || SplatBitSize == 64) {
6684 // Splatted value can fit in one FLOAT constant in constant pool.
6685 // Load the constant and broadcast it.
6686 // AVX have support for 32 and 64 bit broadcast for floats only.
6687 // No 64bit integer in 32bit subtarget.
6688 MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
6689 Constant *C = SplatBitSize == 32
6690 ? ConstantFP::get(Type::getFloatTy(*Ctx),
6691 SplatValue.bitsToFloat())
6692 : ConstantFP::get(Type::getDoubleTy(*Ctx),
6693 SplatValue.bitsToDouble());
6694 SDValue CP = DAG.getConstantPool(C, PVT);
6695 unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
6696
6697 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6698 Ld = DAG.getLoad(
6699 CVT, dl, DAG.getEntryNode(), CP,
6700 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6701 Alignment);
6702 SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
6703 MVT::getVectorVT(CVT, Repeat), Ld);
6704 return DAG.getBitcast(VT, Brdcst);
6705 } else if (SplatBitSize > 64) {
6706 // Load the vector of constants and broadcast it.
6707 MVT CVT = VT.getScalarType();
6708 Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
6709 *Ctx);
6710 SDValue VCP = DAG.getConstantPool(VecC, PVT);
6711 unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
6712 unsigned Alignment = cast<ConstantPoolSDNode>(VCP)->getAlignment();
6713 Ld = DAG.getLoad(
6714 MVT::getVectorVT(CVT, NumElm), dl, DAG.getEntryNode(), VCP,
6715 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6716 Alignment);
6717 SDValue Brdcst = DAG.getNode(X86ISD::SUBV_BROADCAST, dl, VT, Ld);
6718 return DAG.getBitcast(VT, Brdcst);
6719 }
6720 }
6721 }
6722 return SDValue();
6723 }
6724
6725 bool ConstSplatVal =
6726 (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP);
6727
6728 // Make sure that all of the users of a non-constant load are from the
6729 // BUILD_VECTOR node.
6730 if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
6731 return SDValue();
6732
6733 unsigned ScalarSize = Ld.getValueSizeInBits();
6734 bool IsGE256 = (VT.getSizeInBits() >= 256);
6735
6736 // When optimizing for size, generate up to 5 extra bytes for a broadcast
6737 // instruction to save 8 or more bytes of constant pool data.
6738 // TODO: If multiple splats are generated to load the same constant,
6739 // it may be detrimental to overall size. There needs to be a way to detect
6740 // that condition to know if this is truly a size win.
6741 bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();
6742
6743 // Handle broadcasting a single constant scalar from the constant pool
6744 // into a vector.
6745 // On Sandybridge (no AVX2), it is still better to load a constant vector
6746 // from the constant pool and not to broadcast it from a scalar.
6747 // But override that restriction when optimizing for size.
6748 // TODO: Check if splatting is recommended for other AVX-capable CPUs.
6749 if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) {
6750 EVT CVT = Ld.getValueType();
6751 assert(!CVT.isVector() && "Must not broadcast a vector type")((!CVT.isVector() && "Must not broadcast a vector type"
) ? static_cast<void> (0) : __assert_fail ("!CVT.isVector() && \"Must not broadcast a vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6751, __PRETTY_FUNCTION__))
;
6752
6753 // Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
6754 // For size optimization, also splat v2f64 and v2i64, and for size opt
6755 // with AVX2, also splat i8 and i16.
6756 // With pattern matching, the VBROADCAST node may become a VMOVDDUP.
6757 if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
6758 (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
6759 const Constant *C = nullptr;
6760 if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
6761 C = CI->getConstantIntValue();
6762 else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
6763 C = CF->getConstantFPValue();
6764
6765 assert(C && "Invalid constant type")((C && "Invalid constant type") ? static_cast<void
> (0) : __assert_fail ("C && \"Invalid constant type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6765, __PRETTY_FUNCTION__))
;
6766
6767 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6768 SDValue CP =
6769 DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
6770 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6771 Ld = DAG.getLoad(
6772 CVT, dl, DAG.getEntryNode(), CP,
6773 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6774 Alignment);
6775
6776 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6777 }
6778 }
6779
6780 bool IsLoad = ISD::isNormalLoad(Ld.getNode());
6781
6782 // Handle AVX2 in-register broadcasts.
6783 if (!IsLoad && Subtarget.hasInt256() &&
6784 (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
6785 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6786
6787 // The scalar source must be a normal load.
6788 if (!IsLoad)
6789 return SDValue();
6790
6791 if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
6792 (Subtarget.hasVLX() && ScalarSize == 64))
6793 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6794
6795 // The integer check is needed for the 64-bit into 128-bit so it doesn't match
6796 // double since there is no vbroadcastsd xmm
6797 if (Subtarget.hasInt256() && Ld.getValueType().isInteger()) {
6798 if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
6799 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6800 }
6801
6802 // Unsupported broadcast.
6803 return SDValue();
6804}
6805
6806/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
6807/// underlying vector and index.
6808///
6809/// Modifies \p ExtractedFromVec to the real vector and returns the real
6810/// index.
6811static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
6812 SDValue ExtIdx) {
6813 int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
6814 if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
6815 return Idx;
6816
6817 // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
6818 // lowered this:
6819 // (extract_vector_elt (v8f32 %vreg1), Constant<6>)
6820 // to:
6821 // (extract_vector_elt (vector_shuffle<2,u,u,u>
6822 // (extract_subvector (v8f32 %vreg0), Constant<4>),
6823 // undef)
6824 // Constant<0>)
6825 // In this case the vector is the extract_subvector expression and the index
6826 // is 2, as specified by the shuffle.
6827 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
6828 SDValue ShuffleVec = SVOp->getOperand(0);
6829 MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
6830 assert(ShuffleVecVT.getVectorElementType() ==((ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType
().getVectorElementType()) ? static_cast<void> (0) : __assert_fail
("ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType().getVectorElementType()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6831, __PRETTY_FUNCTION__))
6831 ExtractedFromVec.getSimpleValueType().getVectorElementType())((ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType
().getVectorElementType()) ? static_cast<void> (0) : __assert_fail
("ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType().getVectorElementType()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6831, __PRETTY_FUNCTION__))
;
6832
6833 int ShuffleIdx = SVOp->getMaskElt(Idx);
6834 if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
6835 ExtractedFromVec = ShuffleVec;
6836 return ShuffleIdx;
6837 }
6838 return Idx;
6839}
6840
6841static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
6842 MVT VT = Op.getSimpleValueType();
6843
6844 // Skip if insert_vec_elt is not supported.
6845 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6846 if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
6847 return SDValue();
6848
6849 SDLoc DL(Op);
6850 unsigned NumElems = Op.getNumOperands();
6851
6852 SDValue VecIn1;
6853 SDValue VecIn2;
6854 SmallVector<unsigned, 4> InsertIndices;
6855 SmallVector<int, 8> Mask(NumElems, -1);
6856
6857 for (unsigned i = 0; i != NumElems; ++i) {
6858 unsigned Opc = Op.getOperand(i).getOpcode();
6859
6860 if (Opc == ISD::UNDEF)
6861 continue;
6862
6863 if (Opc != ISD::EXTRACT_VECTOR_ELT) {
6864 // Quit if more than 1 elements need inserting.
6865 if (InsertIndices.size() > 1)
6866 return SDValue();
6867
6868 InsertIndices.push_back(i);
6869 continue;
6870 }
6871
6872 SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
6873 SDValue ExtIdx = Op.getOperand(i).getOperand(1);
6874
6875 // Quit if non-constant index.
6876 if (!isa<ConstantSDNode>(ExtIdx))
6877 return SDValue();
6878 int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
6879
6880 // Quit if extracted from vector of different type.
6881 if (ExtractedFromVec.getValueType() != VT)
6882 return SDValue();
6883
6884 if (!VecIn1.getNode())
6885 VecIn1 = ExtractedFromVec;
6886 else if (VecIn1 != ExtractedFromVec) {
6887 if (!VecIn2.getNode())
6888 VecIn2 = ExtractedFromVec;
6889 else if (VecIn2 != ExtractedFromVec)
6890 // Quit if more than 2 vectors to shuffle
6891 return SDValue();
6892 }
6893
6894 if (ExtractedFromVec == VecIn1)
6895 Mask[i] = Idx;
6896 else if (ExtractedFromVec == VecIn2)
6897 Mask[i] = Idx + NumElems;
6898 }
6899
6900 if (!VecIn1.getNode())
6901 return SDValue();
6902
6903 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
6904 SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);
6905
6906 for (unsigned Idx : InsertIndices)
6907 NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
6908 DAG.getIntPtrConstant(Idx, DL));
6909
6910 return NV;
6911}
6912
6913static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
6914 assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&((ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 && "Can not convert non-constant vector"
) ? static_cast<void> (0) : __assert_fail ("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6916, __PRETTY_FUNCTION__))
6915 Op.getScalarValueSizeInBits() == 1 &&((ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 && "Can not convert non-constant vector"
) ? static_cast<void> (0) : __assert_fail ("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6916, __PRETTY_FUNCTION__))
6916 "Can not convert non-constant vector")((ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 && "Can not convert non-constant vector"
) ? static_cast<void> (0) : __assert_fail ("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6916, __PRETTY_FUNCTION__))
;
6917 uint64_t Immediate = 0;
6918 for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
6919 SDValue In = Op.getOperand(idx);
6920 if (!In.isUndef())
6921 Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
6922 }
6923 SDLoc dl(Op);
6924 MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8));
6925 return DAG.getConstant(Immediate, dl, VT);
6926}
6927// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
6928SDValue
6929X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
6930
6931 MVT VT = Op.getSimpleValueType();
6932 assert((VT.getVectorElementType() == MVT::i1) &&(((VT.getVectorElementType() == MVT::i1) && "Unexpected type in LowerBUILD_VECTORvXi1!"
) ? static_cast<void> (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) && \"Unexpected type in LowerBUILD_VECTORvXi1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6933, __PRETTY_FUNCTION__))
6933 "Unexpected type in LowerBUILD_VECTORvXi1!")(((VT.getVectorElementType() == MVT::i1) && "Unexpected type in LowerBUILD_VECTORvXi1!"
) ? static_cast<void> (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) && \"Unexpected type in LowerBUILD_VECTORvXi1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 6933, __PRETTY_FUNCTION__))
;
6934
6935 SDLoc dl(Op);
6936 if (ISD::isBuildVectorAllZeros(Op.getNode()))
6937 return DAG.getTargetConstant(0, dl, VT);
6938
6939 if (ISD::isBuildVectorAllOnes(Op.getNode()))
6940 return DAG.getTargetConstant(1, dl, VT);
6941
6942 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
6943 SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
6944 if (Imm.getValueSizeInBits() == VT.getSizeInBits())
6945 return DAG.getBitcast(VT, Imm);
6946 SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
6947 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
6948 DAG.getIntPtrConstant(0, dl));
6949 }
6950
6951 // Vector has one or more non-const elements
6952 uint64_t Immediate = 0;
6953 SmallVector<unsigned, 16> NonConstIdx;
6954 bool IsSplat = true;
6955 bool HasConstElts = false;
6956 int SplatIdx = -1;
6957 for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
6958 SDValue In = Op.getOperand(idx);
6959 if (In.isUndef())
6960 continue;
6961 if (!isa<ConstantSDNode>(In))
6962 NonConstIdx.push_back(idx);
6963 else {
6964 Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
6965 HasConstElts = true;
6966 }
6967 if (SplatIdx < 0)
6968 SplatIdx = idx;
6969 else if (In != Op.getOperand(SplatIdx))
6970 IsSplat = false;
6971 }
6972
6973 // for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
6974 if (IsSplat)
6975 return DAG.getNode(ISD::SELECT, dl, VT, Op.getOperand(SplatIdx),
6976 DAG.getConstant(1, dl, VT),
6977 DAG.getConstant(0, dl, VT));
6978
6979 // insert elements one by one
6980 SDValue DstVec;
6981 SDValue Imm;
6982 if (Immediate) {
6983 MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
6984 Imm = DAG.getConstant(Immediate, dl, ImmVT);
6985 }
6986 else if (HasConstElts)
6987 Imm = DAG.getConstant(0, dl, VT);
6988 else
6989 Imm = DAG.getUNDEF(VT);
6990 if (Imm.getValueSizeInBits() == VT.getSizeInBits())
6991 DstVec = DAG.getBitcast(VT, Imm);
6992 else {
6993 SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
6994 DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
6995 DAG.getIntPtrConstant(0, dl));
6996 }
6997
6998 for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) {
6999 unsigned InsertIdx = NonConstIdx[i];
7000 DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
7001 Op.getOperand(InsertIdx),
7002 DAG.getIntPtrConstant(InsertIdx, dl));
7003 }
7004 return DstVec;
7005}
7006
7007/// \brief Return true if \p N implements a horizontal binop and return the
7008/// operands for the horizontal binop into V0 and V1.
7009///
7010/// This is a helper function of LowerToHorizontalOp().
7011/// This function checks that the build_vector \p N in input implements a
7012/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
7013/// operation to match.
7014/// For example, if \p Opcode is equal to ISD::ADD, then this function
7015/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
7016/// is equal to ISD::SUB, then this function checks if this is a horizontal
7017/// arithmetic sub.
7018///
7019/// This function only analyzes elements of \p N whose indices are
7020/// in range [BaseIdx, LastIdx).
7021static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
7022 SelectionDAG &DAG,
7023 unsigned BaseIdx, unsigned LastIdx,
7024 SDValue &V0, SDValue &V1) {
7025 EVT VT = N->getValueType(0);
7026
7027 assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!")((BaseIdx * 2 <= LastIdx && "Invalid Indices in input!"
) ? static_cast<void> (0) : __assert_fail ("BaseIdx * 2 <= LastIdx && \"Invalid Indices in input!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 7027, __PRETTY_FUNCTION__))
;
7028 assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&((VT.isVector() && VT.getVectorNumElements() >= LastIdx
&& "Invalid Vector in input!") ? static_cast<void
> (0) : __assert_fail ("VT.isVector() && VT.getVectorNumElements() >= LastIdx && \"Invalid Vector in input!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 7029, __PRETTY_FUNCTION__))
7029 "Invalid Vector in input!")((VT.isVector() && VT.getVectorNumElements() >= LastIdx
&& "Invalid Vector in input!") ? static_cast<void
> (0) : __assert_fail ("VT.isVector() && VT.getVectorNumElements() >= LastIdx && \"Invalid Vector in input!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 7029, __PRETTY_FUNCTION__))
;
7030
7031 bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
7032 bool CanFold = true;
7033 unsigned ExpectedVExtractIdx = BaseIdx;
7034 unsigned NumElts = LastIdx - BaseIdx;
7035 V0 = DAG.getUNDEF(VT);
7036 V1 = DAG.getUNDEF(VT);
7037
7038 // Check if N implements a horizontal binop.
7039 for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
7040 SDValue Op = N->getOperand(i + BaseIdx);
7041
7042 // Skip UNDEFs.
7043 if (Op->isUndef()) {
7044 // Update the expected vector extract index.
7045 if (i * 2 == NumElts)
7046 ExpectedVExtractIdx = BaseIdx;
7047 ExpectedVExtractIdx += 2;
7048 continue;
7049 }
7050
7051 CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
7052
7053 if (!CanFold)
7054 break;
7055
7056 SDValue Op0 = Op.getOperand(0);
7057 SDValue Op1 = Op.getOperand(1);
7058
7059 // Try to match the following pattern:
7060 // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
7061 CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7062 Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7063 Op0.getOperand(0) == Op1.getOperand(0) &&
7064 isa<ConstantSDNode>(Op0.getOperand(1)) &&
7065 isa<ConstantSDNode>(Op1.getOperand(1)));
7066 if (!CanFold)
7067 break;
7068
7069 unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
7070 unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
7071
7072 if (i * 2 < NumElts) {
7073 if (V0.isUndef()) {
7074 V0 = Op0.getOperand(0);
7075 if (V0.getValueType() != VT)
7076 return false;
7077 }
7078 } else {
7079 if (V1.isUndef()) {
7080 V1 = Op0.getOperand(0);
7081 if (V1.getValueType() != VT)
7082 return false;
7083 }
7084 if (i * 2 == NumElts)
7085 ExpectedVExtractIdx = BaseIdx;
7086 }
7087
7088 SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
7089 if (I0 == ExpectedVExtractIdx)
7090 CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
7091 else if (IsCommutable && I1 == ExpectedVExtractIdx) {
7092 // Try to match the following dag sequence:
7093 // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
7094 CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
7095 } else
7096 CanFold = false;
7097
7098 ExpectedVExtractIdx += 2;
7099 }
7100
7101 return CanFold;
7102}
7103
7104/// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
7105/// a concat_vector.
7106///
7107/// This is a helper function of LowerToHorizontalOp().
7108/// This function expects two 256-bit vectors called V0 and V1.
7109/// At first, each vector is split into two separate 128-bit vectors.
7110/// Then, the resulting 128-bit vectors are used to implement two
7111/// horizontal binary operations.
7112///
7113/// The kind of horizontal binary operation is defined by \p X86Opcode.
7114///
7115/// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
7116/// the two new horizontal binop.
7117/// When Mode is set, the first horizontal binop dag node would take as input
7118/// the lower 128-bit of V0 and the upper 128-bit of V0. The second
7119/// horizontal binop dag node would take as input the lower 128-bit of V1
7120/// and the upper 128-bit of V1.
7121/// Example:
7122/// HADD V0_LO, V0_HI
7123/// HADD V1_LO, V1_HI
7124///
7125/// Otherwise, the first horizontal binop dag node takes as input the lower
7126/// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
7127/// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
7128/// Example:
7129/// HADD V0_LO, V1_LO
7130/// HADD V0_HI, V1_HI
7131///
7132/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
7133/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
7134/// the upper 128-bits of the result.
7135static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
7136 const SDLoc &DL, SelectionDAG &DAG,
7137 unsigned X86Opcode, bool Mode,
7138 bool isUndefLO, bool isUndefHI) {
7139 MVT VT = V0.getSimpleValueType();
7140 assert(VT.is256BitVector() && VT == V1.getSimpleValueType() &&((VT.is256BitVector() && VT == V1.getSimpleValueType(
) && "Invalid nodes in input!") ? static_cast<void
> (0) : __assert_fail ("VT.is256BitVector() && VT == V1.getSimpleValueType() && \"Invalid nodes in input!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 7141, __PRETTY_FUNCTION__))
7141 "Invalid nodes in input!")((VT.is256BitVector() && VT == V1.getSimpleValueType(
) && "Invalid nodes in input!") ? static_cast<void
> (0) : __assert_fail ("VT.is256BitVector() && VT == V1.getSimpleValueType() && \"Invalid nodes in input!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 7141, __PRETTY_FUNCTION__))
;
7142
7143 unsigned NumElts = VT.getVectorNumElements();
7144 SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL);
7145 SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL);
7146 SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL);
7147 SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL);
7148 MVT NewVT = V0_LO.getSimpleValueType();
7149
7150 SDValue LO = DAG.getUNDEF(NewVT);
7151 SDValue HI = DAG.getUNDEF(NewVT);
7152
7153 if (Mode) {
7154 // Don't emit a horizontal binop if the result is expected to be UNDEF.
7155 if (!isUndefLO && !V0->isUndef())
7156 LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
7157 if (!isUndefHI && !V1->isUndef())
7158 HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
7159 } else {
7160 // Don't emit a horizontal binop if the result is expected to be UNDEF.
7161 if (!isUndefLO && (!V0_LO->isUndef() || !V1_LO->isUndef()))
7162 LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
7163
7164 if (!isUndefHI && (!V0_HI->isUndef() || !V1_HI->isUndef()))
7165 HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
7166 }
7167
7168 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
7169}
7170
7171/// Returns true iff \p BV builds a vector with the result equivalent to
7172/// the result of ADDSUB operation.
7173/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation
7174/// are written to the parameters \p Opnd0 and \p Opnd1.
7175static bool isAddSub(const BuildVectorSDNode *BV,
7176 const X86Subtarget &Subtarget, SelectionDAG &DAG,
7177 SDValue &Opnd0, SDValue &Opnd1) {
7178
7179 MVT VT = BV->getSimpleValueType(0);
7180 if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
7181 (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
7182 (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
7183 return false;
7184
7185 unsigned NumElts = VT.getVectorNumElements();
7186 SDValue InVec0 = DAG.getUNDEF(VT);
7187 SDValue InVec1 = DAG.getUNDEF(VT);
7188
7189 // Odd-numbered elements in the input build vector are obtained from
7190 // adding two integer/float elements.
7191 // Even-numbered elements in the input build vector are obtained from
7192 // subtracting two integer/float elements.
7193 unsigned ExpectedOpcode = ISD::FSUB;
7194 unsigned NextExpectedOpcode = ISD::FADD;
7195 bool AddFound = false;
7196 bool SubFound = false;
7197
7198 for (unsigned i = 0, e = NumElts; i != e; ++i) {
7199 SDValue Op = BV->getOperand(i);
7200
7201 // Skip 'undef' values.
7202 unsigned Opcode = Op.getOpcode();
7203 if (Opcode == ISD::UNDEF) {
7204 std::swap(ExpectedOpcode, NextExpectedOpcode);
7205 continue;
7206 }
7207
7208 // Early exit if we found an unexpected opcode.
7209 if (Opcode != ExpectedOpcode)
7210 return false;
7211
7212 SDValue Op0 = Op.getOperand(0);
7213 SDValue Op1 = Op.getOperand(1);
7214
7215 // Try to match the following pattern:
7216 // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
7217 // Early exit if we cannot match that sequence.
7218 if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7219 Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7220 !isa<ConstantSDNode>(Op0.getOperand(1)) ||
7221 !isa<ConstantSDNode>(Op1.getOperand(1)) ||
7222 Op0.getOperand(1) != Op1.getOperand(1))
7223 return false;
7224
7225 unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
7226 if (I0 != i)
7227 return false;
7228
7229 // We found a valid add/sub node. Update the information accordingly.
7230 if (i & 1)
7231 AddFound = true;
7232 else
7233 SubFound = true;
7234
7235 // Update InVec0 and InVec1.
7236 if (InVec0.isUndef()) {
7237 InVec0 = Op0.getOperand(0);
7238 if (InVec0.getSimpleValueType() != VT)
7239 return false;
7240 }
7241 if (InVec1.isUndef()) {
7242 InVec1 = Op1.getOperand(0);
7243 if (InVec1.getSimpleValueType() != VT)
7244 return false;
7245 }
7246
7247 // Make sure that operands in input to each add/sub node always
7248 // come from a same pair of vectors.
7249 if (InVec0 != Op0.getOperand(0)) {
7250 if (ExpectedOpcode == ISD::FSUB)
7251 return false;
7252
7253 // FADD is commutable. Try to commute the operands
7254 // and then test again.
7255 std::swap(Op0, Op1);
7256 if (InVec0 != Op0.getOperand(0))
7257 return false;
7258 }
7259
7260 if (InVec1 != Op1.getOperand(0))
7261 return false;
7262
7263 // Update the pair of expected opcodes.
7264 std::swap(ExpectedOpcode, NextExpectedOpcode);
7265 }
7266
7267 // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
7268 if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef())
7269 return false;
7270
7271 Opnd0 = InVec0;
7272 Opnd1 = InVec1;
7273 return true;
7274}
7275
7276/// Returns true if is possible to fold MUL and an idiom that has already been
7277/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1).
7278/// If (and only if) true is returned, the operands of FMADDSUB are written to
7279/// parameters \p Opnd0, \p Opnd1, \p Opnd2.
7280///
7281/// Prior to calling this function it should be known that there is some
7282/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
7283/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called
7284/// before replacement of such SDNode with ADDSUB operation. Thus the number
7285/// of \p Opnd0 uses is expected to be equal to 2.
7286/// For example, this function may be called for the following IR:
7287/// %AB = fmul fast <2 x double> %A, %B
7288/// %Sub = fsub fast <2 x double> %AB, %C
7289/// %Add = fadd fast <2 x double> %AB, %C
7290/// %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add,
7291/// <2 x i32> <i32 0, i32 3>
7292/// There is a def for %Addsub here, which potentially can be replaced by
7293/// X86ISD::ADDSUB operation:
7294/// %Addsub = X86ISD::ADDSUB %AB, %C
7295/// and such ADDSUB can further be replaced with FMADDSUB:
7296/// %Addsub = FMADDSUB %A, %B, %C.
7297///
7298/// The main reason why this method is called before the replacement of the
7299/// recognized ADDSUB idiom with ADDSUB operation is that such replacement
7300/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
7301/// FMADDSUB is.
7302static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG,
7303 SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2) {
7304 if (Opnd0.getOpcode() != ISD::FMUL || Opnd0->use_size() != 2 ||
7305 !Subtarget.hasAnyFMA())
7306 return false;
7307
7308 // FIXME: These checks must match the similar ones in
7309 // DAGCombiner::visitFADDForFMACombine. It would be good to have one
7310 // function that would answer if it is Ok to fuse MUL + ADD to FMADD
7311 // or MUL + ADDSUB to FMADDSUB.
7312 const TargetOptions &Options = DAG.getTarget().Options;
7313 bool AllowFusion =
7314 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
7315 if (!AllowFusion)
7316 return false;
7317
7318 Opnd2 = Opnd1;
7319 Opnd1 = Opnd0.getOperand(1);
7320 Opnd0 = Opnd0.getOperand(0);
7321
7322 return true;
7323}
7324
7325/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation
7326/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node.
7327static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
7328 const X86Subtarget &Subtarget,
7329 SelectionDAG &DAG) {
7330 SDValue Opnd0, Opnd1;
7331 if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1))
7332 return SDValue();
7333
7334 MVT VT = BV->getSimpleValueType(0);
7335 SDLoc DL(BV);
7336
7337 // Try to generate X86ISD::FMADDSUB node here.
7338 SDValue Opnd2;
7339 if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
7340 return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
7341
7342 // Do not generate X86ISD::ADDSUB node for 512-bit types even though
7343 // the ADDSUB idiom has been successfully recognized. There are no known
7344 // X86 targets with 512-bit ADDSUB instructions!
7345 // 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom
7346 // recognition.
7347 if (VT.is512BitVector())
7348 return SDValue();
7349
7350 return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
7351}
7352
7353/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
7354static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
7355 const X86Subtarget &Subtarget,
7356 SelectionDAG &DAG) {
7357 MVT VT = BV->getSimpleValueType(0);
7358 unsigned NumElts = VT.getVectorNumElements();
7359 unsigned NumUndefsLO = 0;
7360 unsigned NumUndefsHI = 0;
7361 unsigned Half = NumElts/2;
7362
7363 // Count the number of UNDEF operands in the build_vector in input.
7364 for (unsigned i = 0, e = Half; i != e; ++i)
7365 if (BV->getOperand(i)->isUndef())
7366 NumUndefsLO++;
7367
7368 for (unsigned i = Half, e = NumElts; i != e; ++i)
7369 if (BV->getOperand(i)->isUndef())
7370 NumUndefsHI++;
7371
7372 // Early exit if this is either a build_vector of all UNDEFs or all the
7373 // operands but one are UNDEF.
7374 if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
7375 return SDValue();
7376
7377 SDLoc DL(BV);
7378 SDValue InVec0, InVec1;
7379 if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) {
7380 // Try to match an SSE3 float HADD/HSUB.
7381 if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
7382 return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
7383
7384 if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
7385 return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
7386 } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget.hasSSSE3()) {
7387 // Try to match an SSSE3 integer HADD/HSUB.
7388 if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
7389 return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
7390
7391 if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
7392 return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
7393 }
7394
7395 if (!Subtarget.hasAVX())
7396 return SDValue();
7397
7398 if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
7399 // Try to match an AVX horizontal add/sub of packed single/double
7400 // precision floating point values from 256-bit vectors.
7401 SDValue InVec2, InVec3;
7402 if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
7403 isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
7404 ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
7405 ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
7406 return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
7407
7408 if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
7409 isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
7410 ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
7411 ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
7412 return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
7413 } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
7414 // Try to match an AVX2 horizontal add/sub of signed integers.
7415 SDValue InVec2, InVec3;
7416 unsigned X86Opcode;
7417 bool CanFold = true;
7418
7419 if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
7420 isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
7421 ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
7422 ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
7423 X86Opcode = X86ISD::HADD;
7424 else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
7425 isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
7426 ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
7427 ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
7428 X86Opcode = X86ISD::HSUB;
7429 else
7430 CanFold = false;
7431
7432 if (CanFold) {
7433 // Fold this build_vector into a single horizontal add/sub.
7434 // Do this only if the target has AVX2.
7435 if (Subtarget.hasAVX2())
7436 return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
7437
7438 // Do not try to expand this build_vector into a pair of horizontal
7439 // add/sub if we can emit a pair of scalar add/sub.
7440 if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
7441 return SDValue();
7442
7443 // Convert this build_vector into a pair of horizontal binop followed by
7444 // a concat vector.
7445 bool isUndefLO = NumUndefsLO == Half;
7446 bool isUndefHI = NumUndefsHI == Half;
7447 return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
7448 isUndefLO, isUndefHI);
7449 }
7450 }
7451
7452 if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
7453 VT == MVT::v16i16) && Subtarget.hasAVX()) {
7454 unsigned X86Opcode;
7455 if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
7456 X86Opcode = X86ISD::HADD;
7457 else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
7458 X86Opcode = X86ISD::HSUB;
7459 else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
7460 X86Opcode = X86ISD::FHADD;
7461 else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
7462 X86Opcode = X86ISD::FHSUB;
7463 else
7464 return SDValue();
7465
7466 // Don't try to expand this build_vector into a pair of horizontal add/sub
7467 // if we can simply emit a pair of scalar add/sub.
7468 if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
7469 return SDValue();
7470
7471 // Convert this build_vector into two horizontal add/sub followed by
7472 // a concat vector.
7473 bool isUndefLO = NumUndefsLO == Half;
7474 bool isUndefHI = NumUndefsHI == Half;
7475 return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
7476 isUndefLO, isUndefHI);
7477 }
7478
7479 return SDValue();
7480}
7481
7482/// If a BUILD_VECTOR's source elements all apply the same bit operation and
7483/// one of their operands is constant, lower to a pair of BUILD_VECTOR and
7484/// just apply the bit to the vectors.
7485/// NOTE: Its not in our interest to start make a general purpose vectorizer
7486/// from this, but enough scalar bit operations are created from the later
7487/// legalization + scalarization stages to need basic support.
7488static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
7489 SelectionDAG &DAG) {
7490 SDLoc DL(Op);
7491 MVT VT = Op->getSimpleValueType(0);
7492 unsigned NumElems = VT.getVectorNumElements();
7493 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7494
7495 // Check that all elements have the same opcode.
7496 // TODO: Should we allow UNDEFS and if so how many?
7497 unsigned Opcode = Op->getOperand(0).getOpcode();
7498 for (unsigned i = 1; i < NumElems; ++i)
7499 if (Opcode != Op->getOperand(i).getOpcode())
7500 return SDValue();
7501
7502 // TODO: We may be able to add support for other Ops (ADD/SUB + shifts).
7503 switch (Opcode) {
7504 default:
7505 return SDValue();
7506 case ISD::AND:
7507 case ISD::XOR:
7508 case ISD::OR:
7509 if (!TLI.isOperationLegalOrPromote(Opcode, VT))
7510 return SDValue();
7511 break;
7512 }
7513
7514 SmallVector<SDValue, 4> LHSElts, RHSElts;
7515 for (SDValue Elt : Op->ops()) {
7516 SDValue LHS = Elt.getOperand(0);
7517 SDValue RHS = Elt.getOperand(1);
7518
7519 // We expect the canonicalized RHS operand to be the constant.
7520 if (!isa<ConstantSDNode>(RHS))
7521 return SDValue();
7522 LHSElts.push_back(LHS);
7523 RHSElts.push_back(RHS);
7524 }
7525
7526 SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
7527 SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
7528 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7529}
7530
7531/// Create a vector constant without a load. SSE/AVX provide the bare minimum
7532/// functionality to do this, so it's all zeros, all ones, or some derivation
7533/// that is cheap to calculate.
7534static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
7535 const X86Subtarget &Subtarget) {
7536 SDLoc DL(Op);
7537 MVT VT = Op.getSimpleValueType();
7538
7539 // Vectors containing all zeros can be matched by pxor and xorps.
7540 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
7541 // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
7542 // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
7543 if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
7544 return Op;
7545
7546 return getZeroVector(VT, Subtarget, DAG, DL);
7547 }
7548
7549 // Vectors containing all ones can be matched by pcmpeqd on 128-bit width
7550 // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
7551 // vpcmpeqd on 256-bit vectors.
7552 if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
7553 if (VT == MVT::v4i32 || VT == MVT::v16i32 ||
7554 (VT == MVT::v8i32 && Subtarget.hasInt256()))
7555 return Op;
7556
7557 return getOnesVector(VT, Subtarget, DAG, DL);
7558 }
7559
7560 return SDValue();
7561}
7562
7563SDValue
7564X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
7565 SDLoc dl(Op);
7566
7567 MVT VT = Op.getSimpleValueType();
7568 MVT ExtVT = VT.getVectorElementType();
7569 unsigned NumElems = Op.getNumOperands();
7570
7571 // Generate vectors for predicate vectors.
7572 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
7573 return LowerBUILD_VECTORvXi1(Op, DAG);
7574
7575 if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
7576 return VectorConstant;
7577
7578 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
7579 if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
7580 return AddSub;
7581 if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
7582 return HorizontalOp;
7583 if (SDValue Broadcast = LowerVectorBroadcast(BV, Subtarget, DAG))
7584 return Broadcast;
7585 if (SDValue BitOp = lowerBuildVectorToBitOp(BV, DAG))
7586 return BitOp;
7587
7588 unsigned EVTBits = ExtVT.getSizeInBits();
7589
7590 unsigned NumZero = 0;
7591 unsigned NumNonZero = 0;
7592 uint64_t NonZeros = 0;
7593 bool IsAllConstants = true;
7594 SmallSet<SDValue, 8> Values;
7595 for (unsigned i = 0; i < NumElems; ++i) {
7596 SDValue Elt = Op.getOperand(i);
7597 if (Elt.isUndef())
7598 continue;
7599 Values.insert(Elt);
7600 if (Elt.getOpcode() != ISD::Constant &&
7601 Elt.getOpcode() != ISD::ConstantFP)
7602 IsAllConstants = false;
7603 if (X86::isZeroNode(Elt))
7604 NumZero++;
7605 else {
7606 assert(i < sizeof(NonZeros) * 8)((i < sizeof(NonZeros) * 8) ? static_cast<void> (0) :
__assert_fail ("i < sizeof(NonZeros) * 8", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 7606, __PRETTY_FUNCTION__))
; // Make sure the shift is within range.
7607 NonZeros |= ((uint64_t)1 << i);
7608 NumNonZero++;
7609 }
7610 }
7611
7612 // All undef vector. Return an UNDEF. All zero vectors were handled above.
7613 if (NumNonZero == 0)
7614 return DAG.getUNDEF(VT);
7615
7616 // Special case for single non-zero, non-undef, element.
7617 if (NumNonZero == 1) {
7618 unsigned Idx = countTrailingZeros(NonZeros);
7619 SDValue Item = Op.getOperand(Idx);
7620
7621 // If this is an insertion of an i64 value on x86-32, and if the top bits of
7622 // the value are obviously zero, truncate the value to i32 and do the
7623 // insertion that way. Only do this if the value is non-constant or if the
7624 // value is a constant being inserted into element 0. It is cheaper to do
7625 // a constant pool load than it is to do a movd + shuffle.
7626 if (ExtVT == MVT::i64 && !Subtarget.is64Bit() &&
7627 (!IsAllConstants || Idx == 0)) {
7628 if (DAG.MaskedValueIsZero(Item, APInt::getHighBitsSet(64, 32))) {
7629 // Handle SSE only.
7630 assert(VT == MVT::v2i64 && "Expected an SSE value type!")((VT == MVT::v2i64 && "Expected an SSE value type!") ?
static_cast<void> (0) : __assert_fail ("VT == MVT::v2i64 && \"Expected an SSE value type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 7630, __PRETTY_FUNCTION__))
;
7631 MVT VecVT = MVT::v4i32;
7632
7633 // Truncate the value (which may itself be a constant) to i32, and
7634 // convert it to a vector with movd (S2V+shuffle to zero extend).
7635 Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
7636 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
7637 return DAG.getBitcast(VT, getShuffleVectorZeroOrUndef(
7638 Item, Idx * 2, true, Subtarget, DAG));
7639 }
7640 }
7641
7642 // If we have a constant or non-constant insertion into the low element of
7643 // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
7644 // the rest of the elements. This will be matched as movd/movq/movss/movsd
7645 // depending on what the source datatype is.
7646 if (Idx == 0) {
7647 if (NumZero == 0)
7648 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
7649
7650 if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
7651 (ExtVT == MVT::i64 && Subtarget.is64Bit())) {
7652 assert((VT.is128BitVector() || VT.is256BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected an SSE value type!") ? static_cast<
void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected an SSE value type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 7654, __PRETTY_FUNCTION__))
7653 VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected an SSE value type!") ? static_cast<
void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected an SSE value type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 7654, __PRETTY_FUNCTION__))
7654 "Expected an SSE value type!")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected an SSE value type!") ? static_cast<
void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected an SSE value type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 7654, __PRETTY_FUNCTION__))
;
7655 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
7656 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
7657 return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
7658 }
7659
7660 // We can't directly insert an i8 or i16 into a vector, so zero extend
7661 // it to i32 first.
7662 if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
7663 Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
7664 if (VT.getSizeInBits() >= 256) {
7665 MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
7666 if (Subtarget.hasAVX()) {
7667 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item);
7668 Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
7669 } else {
7670 // Without AVX, we need to extend to a 128-bit vector and then
7671 // insert into the 256-bit vector.
7672 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
7673 SDValue ZeroVec = getZeroVector(ShufVT, Subtarget, DAG, dl);
7674 Item = insert128BitVector(ZeroVec, Item, 0, DAG, dl);
7675 }
7676 } else {
7677 assert(VT.is128BitVector() && "Expected an SSE value type!")((VT.is128BitVector() && "Expected an SSE value type!"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && \"Expected an SSE value type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/X86/X86ISelLowering.cpp"
, 7677, __PRETTY_FUNCTION__))
;
7678 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
7679 Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
7680 }
7681 return DAG.getBitcast(VT, Item);
7682 }
7683 }
7684
7685 // Is it a vector logical left shift?
7686 if (NumElems == 2 && Idx == 1 &&
7687 X86::isZeroNode(Op.getOperand(0)) &&
7688 !X86::isZeroNode(Op.getOperand(1))) {
7689 unsigned NumBits = VT.getSizeInBits();
7690 return getVShift(true, VT,
7691 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
7692 VT, Op.getOperand(1)),
7693 NumBits/2, DAG, *this, dl);
7694 }
7695
7696 if (IsAllConstants) // Otherwise, it's better to do a constpool load.
7697 return SDValue();
7698
7699 // Otherwise, if this is a vector with i32 or f32 elements, and the element
7700 // is a non-constant being inserted into an element other than the low one,
7701 // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
7702 // movd/movss) to move this into the low element, then shuffle it into
7703 // place.
7704 if (EVTBits == 32) {
7705 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
7706 return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
7707 }
7708 }
7709
7710 // Splat is obviously ok. Let legalizer expand it to a shuffle.
7711 if (Values.size() == 1) {
7712 if (EVTBits == 32) {
7713 // Instead of a shuffle like this:
7714 // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
7715 // Check if it's possible to issue this instead.
7716 // shuffle (vload ptr)), undef, <1, 1, 1, 1>
7717 unsigned Idx = countTrailingZeros(NonZeros);
7718 SDValue Item = Op.getOperand(Idx);
7719 if (Op.getNode()->isOnlyUserOf(Item.getNode()))
7720