Bug Summary

File:lib/Target/X86/X86ISelLowering.cpp
Warning:line 27007, column 5
Value stored to 'AllowIntDomain' is never read

Annotated Source Code

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "X86ISelLowering.h"
16#include "Utils/X86ShuffleDecode.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86ShuffleDecodeConstantPool.h"
23#include "X86TargetMachine.h"
24#include "X86TargetObjectFile.h"
25#include "llvm/ADT/SmallBitVector.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringExtras.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/CodeGen/IntrinsicLowering.h"
32#include "llvm/CodeGen/MachineFrameInfo.h"
33#include "llvm/CodeGen/MachineFunction.h"
34#include "llvm/CodeGen/MachineInstrBuilder.h"
35#include "llvm/CodeGen/MachineJumpTableInfo.h"
36#include "llvm/CodeGen/MachineModuleInfo.h"
37#include "llvm/CodeGen/MachineRegisterInfo.h"
38#include "llvm/CodeGen/WinEHFuncInfo.h"
39#include "llvm/IR/CallSite.h"
40#include "llvm/IR/CallingConv.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DerivedTypes.h"
43#include "llvm/IR/DiagnosticInfo.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/GlobalAlias.h"
46#include "llvm/IR/GlobalVariable.h"
47#include "llvm/IR/Instructions.h"
48#include "llvm/IR/Intrinsics.h"
49#include "llvm/MC/MCAsmInfo.h"
50#include "llvm/MC/MCContext.h"
51#include "llvm/MC/MCExpr.h"
52#include "llvm/MC/MCSymbol.h"
53#include "llvm/Support/CommandLine.h"
54#include "llvm/Support/Debug.h"
55#include "llvm/Support/ErrorHandling.h"
56#include "llvm/Support/KnownBits.h"
57#include "llvm/Support/MathExtras.h"
58#include "llvm/Target/TargetLowering.h"
59#include "llvm/Target/TargetOptions.h"
60#include <algorithm>
61#include <bitset>
62#include <cctype>
63#include <numeric>
64using namespace llvm;
65
66#define DEBUG_TYPE"x86-isel" "x86-isel"
67
68STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
69
70static cl::opt<bool> ExperimentalVectorWideningLegalization(
71 "x86-experimental-vector-widening-legalization", cl::init(false),
72 cl::desc("Enable an experimental vector type legalization through widening "
73 "rather than promotion."),
74 cl::Hidden);
75
76static cl::opt<int> ExperimentalPrefLoopAlignment(
77 "x86-experimental-pref-loop-alignment", cl::init(4),
78 cl::desc("Sets the preferable loop alignment for experiments "
79 "(the last x86-experimental-pref-loop-alignment bits"
80 " of the loop header PC will be 0)."),
81 cl::Hidden);
82
83/// Call this when the user attempts to do something unsupported, like
84/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
85/// report_fatal_error, so calling code should attempt to recover without
86/// crashing.
87static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
88 const char *Msg) {
89 MachineFunction &MF = DAG.getMachineFunction();
90 DAG.getContext()->diagnose(
91 DiagnosticInfoUnsupported(*MF.getFunction(), Msg, dl.getDebugLoc()));
92}
93
94X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
95 const X86Subtarget &STI)
96 : TargetLowering(TM), Subtarget(STI) {
97 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
98 X86ScalarSSEf64 = Subtarget.hasSSE2();
99 X86ScalarSSEf32 = Subtarget.hasSSE1();
100 MVT PtrVT = MVT::getIntegerVT(8 * TM.getPointerSize());
101
102 // Set up the TargetLowering object.
103
104 // X86 is weird. It always uses i8 for shift amounts and setcc results.
105 setBooleanContents(ZeroOrOneBooleanContent);
106 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
107 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
108
109 // For 64-bit, since we have so many registers, use the ILP scheduler.
110 // For 32-bit, use the register pressure specific scheduling.
111 // For Atom, always use ILP scheduling.
112 if (Subtarget.isAtom())
113 setSchedulingPreference(Sched::ILP);
114 else if (Subtarget.is64Bit())
115 setSchedulingPreference(Sched::ILP);
116 else
117 setSchedulingPreference(Sched::RegPressure);
118 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
119 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
120
121 // Bypass expensive divides and use cheaper ones.
122 if (TM.getOptLevel() >= CodeGenOpt::Default) {
123 if (Subtarget.hasSlowDivide32())
124 addBypassSlowDiv(32, 8);
125 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
126 addBypassSlowDiv(64, 32);
127 }
128
129 if (Subtarget.isTargetKnownWindowsMSVC() ||
130 Subtarget.isTargetWindowsItanium()) {
131 // Setup Windows compiler runtime calls.
132 setLibcallName(RTLIB::SDIV_I64, "_alldiv");
133 setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
134 setLibcallName(RTLIB::SREM_I64, "_allrem");
135 setLibcallName(RTLIB::UREM_I64, "_aullrem");
136 setLibcallName(RTLIB::MUL_I64, "_allmul");
137 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::X86_StdCall);
138 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::X86_StdCall);
139 setLibcallCallingConv(RTLIB::SREM_I64, CallingConv::X86_StdCall);
140 setLibcallCallingConv(RTLIB::UREM_I64, CallingConv::X86_StdCall);
141 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::X86_StdCall);
142 }
143
144 if (Subtarget.isTargetDarwin()) {
145 // Darwin should use _setjmp/_longjmp instead of setjmp/longjmp.
146 setUseUnderscoreSetJmp(false);
147 setUseUnderscoreLongJmp(false);
148 } else if (Subtarget.isTargetWindowsGNU()) {
149 // MS runtime is weird: it exports _setjmp, but longjmp!
150 setUseUnderscoreSetJmp(true);
151 setUseUnderscoreLongJmp(false);
152 } else {
153 setUseUnderscoreSetJmp(true);
154 setUseUnderscoreLongJmp(true);
155 }
156
157 // Set up the register classes.
158 addRegisterClass(MVT::i8, &X86::GR8RegClass);
159 addRegisterClass(MVT::i16, &X86::GR16RegClass);
160 addRegisterClass(MVT::i32, &X86::GR32RegClass);
161 if (Subtarget.is64Bit())
162 addRegisterClass(MVT::i64, &X86::GR64RegClass);
163
164 for (MVT VT : MVT::integer_valuetypes())
165 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
166
167 // We don't accept any truncstore of integer registers.
168 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
169 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
170 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
171 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
172 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
173 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
174
175 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
176
177 // SETOEQ and SETUNE require checking two conditions.
178 setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand);
179 setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand);
180 setCondCodeAction(ISD::SETOEQ, MVT::f80, Expand);
181 setCondCodeAction(ISD::SETUNE, MVT::f32, Expand);
182 setCondCodeAction(ISD::SETUNE, MVT::f64, Expand);
183 setCondCodeAction(ISD::SETUNE, MVT::f80, Expand);
184
185 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
186 // operation.
187 setOperationAction(ISD::UINT_TO_FP , MVT::i1 , Promote);
188 setOperationAction(ISD::UINT_TO_FP , MVT::i8 , Promote);
189 setOperationAction(ISD::UINT_TO_FP , MVT::i16 , Promote);
190
191 if (Subtarget.is64Bit()) {
192 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512())
193 // f32/f64 are legal, f80 is custom.
194 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
195 else
196 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Promote);
197 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
198 } else if (!Subtarget.useSoftFloat()) {
199 // We have an algorithm for SSE2->double, and we turn this into a
200 // 64-bit FILD followed by conditional FADD for other targets.
201 setOperationAction(ISD::UINT_TO_FP , MVT::i64 , Custom);
202 // We have an algorithm for SSE2, and we turn this into a 64-bit
203 // FILD or VCVTUSI2SS/SD for other targets.
204 setOperationAction(ISD::UINT_TO_FP , MVT::i32 , Custom);
205 }
206
207 // Promote i1/i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
208 // this operation.
209 setOperationAction(ISD::SINT_TO_FP , MVT::i1 , Promote);
210 setOperationAction(ISD::SINT_TO_FP , MVT::i8 , Promote);
211
212 if (!Subtarget.useSoftFloat()) {
213 // SSE has no i16 to fp conversion, only i32.
214 if (X86ScalarSSEf32) {
215 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
216 // f32 and f64 cases are Legal, f80 case is not
217 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
218 } else {
219 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Custom);
220 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Custom);
221 }
222 } else {
223 setOperationAction(ISD::SINT_TO_FP , MVT::i16 , Promote);
224 setOperationAction(ISD::SINT_TO_FP , MVT::i32 , Promote);
225 }
226
227 // Promote i1/i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
228 // this operation.
229 setOperationAction(ISD::FP_TO_SINT , MVT::i1 , Promote);
230 setOperationAction(ISD::FP_TO_SINT , MVT::i8 , Promote);
231
232 if (!Subtarget.useSoftFloat()) {
233 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
234 // are Legal, f80 is custom lowered.
235 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Custom);
236 setOperationAction(ISD::SINT_TO_FP , MVT::i64 , Custom);
237
238 if (X86ScalarSSEf32) {
239 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
240 // f32 and f64 cases are Legal, f80 case is not
241 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
242 } else {
243 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Custom);
244 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Custom);
245 }
246 } else {
247 setOperationAction(ISD::FP_TO_SINT , MVT::i16 , Promote);
248 setOperationAction(ISD::FP_TO_SINT , MVT::i32 , Expand);
249 setOperationAction(ISD::FP_TO_SINT , MVT::i64 , Expand);
250 }
251
252 // Handle FP_TO_UINT by promoting the destination to a larger signed
253 // conversion.
254 setOperationAction(ISD::FP_TO_UINT , MVT::i1 , Promote);
255 setOperationAction(ISD::FP_TO_UINT , MVT::i8 , Promote);
256 setOperationAction(ISD::FP_TO_UINT , MVT::i16 , Promote);
257
258 if (Subtarget.is64Bit()) {
259 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
260 // FP_TO_UINT-i32/i64 is legal for f32/f64, but custom for f80.
261 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
262 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
263 } else {
264 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Promote);
265 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Expand);
266 }
267 } else if (!Subtarget.useSoftFloat()) {
268 // Since AVX is a superset of SSE3, only check for SSE here.
269 if (Subtarget.hasSSE1() && !Subtarget.hasSSE3())
270 // Expand FP_TO_UINT into a select.
271 // FIXME: We would like to use a Custom expander here eventually to do
272 // the optimal thing for SSE vs. the default expansion in the legalizer.
273 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Expand);
274 else
275 // With AVX512 we can use vcvts[ds]2usi for f32/f64->i32, f80 is custom.
276 // With SSE3 we can use fisttpll to convert to a signed i64; without
277 // SSE, we're stuck with a fistpll.
278 setOperationAction(ISD::FP_TO_UINT , MVT::i32 , Custom);
279
280 setOperationAction(ISD::FP_TO_UINT , MVT::i64 , Custom);
281 }
282
283 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
284 if (!X86ScalarSSEf64) {
285 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
286 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
287 if (Subtarget.is64Bit()) {
288 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
289 // Without SSE, i64->f64 goes through memory.
290 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
291 }
292 } else if (!Subtarget.is64Bit())
293 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
294
295 // Scalar integer divide and remainder are lowered to use operations that
296 // produce two results, to match the available instructions. This exposes
297 // the two-result form to trivial CSE, which is able to combine x/y and x%y
298 // into a single instruction.
299 //
300 // Scalar integer multiply-high is also lowered to use two-result
301 // operations, to match the available instructions. However, plain multiply
302 // (low) operations are left as Legal, as there are single-result
303 // instructions for this in x86. Using the two-result multiply instructions
304 // when both high and low results are needed must be arranged by dagcombine.
305 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
306 setOperationAction(ISD::MULHS, VT, Expand);
307 setOperationAction(ISD::MULHU, VT, Expand);
308 setOperationAction(ISD::SDIV, VT, Expand);
309 setOperationAction(ISD::UDIV, VT, Expand);
310 setOperationAction(ISD::SREM, VT, Expand);
311 setOperationAction(ISD::UREM, VT, Expand);
312 }
313
314 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
315 if (VT == MVT::i64 && !Subtarget.is64Bit())
316 continue;
317 // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
318 setOperationAction(ISD::ADDC, VT, Custom);
319 setOperationAction(ISD::ADDE, VT, Custom);
320 setOperationAction(ISD::SUBC, VT, Custom);
321 setOperationAction(ISD::SUBE, VT, Custom);
322 }
323
324 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
325 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
326 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
327 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
328 setOperationAction(ISD::BR_CC, VT, Expand);
329 setOperationAction(ISD::SELECT_CC, VT, Expand);
330 }
331 if (Subtarget.is64Bit())
332 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
333 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
334 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
335 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
336 setOperationAction(ISD::FP_ROUND_INREG , MVT::f32 , Expand);
337
338 setOperationAction(ISD::FREM , MVT::f32 , Expand);
339 setOperationAction(ISD::FREM , MVT::f64 , Expand);
340 setOperationAction(ISD::FREM , MVT::f80 , Expand);
341 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
342
343 // Promote the i8 variants and force them on up to i32 which has a shorter
344 // encoding.
345 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
346 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
347 if (!Subtarget.hasBMI()) {
348 setOperationAction(ISD::CTTZ , MVT::i16 , Custom);
349 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
350 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
351 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
352 if (Subtarget.is64Bit()) {
353 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
354 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
355 }
356 }
357
358 if (Subtarget.hasLZCNT()) {
359 // When promoting the i8 variants, force them to i32 for a shorter
360 // encoding.
361 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
362 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
363 } else {
364 setOperationAction(ISD::CTLZ , MVT::i8 , Custom);
365 setOperationAction(ISD::CTLZ , MVT::i16 , Custom);
366 setOperationAction(ISD::CTLZ , MVT::i32 , Custom);
367 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , Custom);
368 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16 , Custom);
369 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32 , Custom);
370 if (Subtarget.is64Bit()) {
371 setOperationAction(ISD::CTLZ , MVT::i64 , Custom);
372 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
373 }
374 }
375
376 // Special handling for half-precision floating point conversions.
377 // If we don't have F16C support, then lower half float conversions
378 // into library calls.
379 if (Subtarget.useSoftFloat() ||
380 (!Subtarget.hasF16C() && !Subtarget.hasAVX512())) {
381 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
382 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
383 }
384
385 // There's never any support for operations beyond MVT::f32.
386 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
387 setOperationAction(ISD::FP16_TO_FP, MVT::f80, Expand);
388 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
389 setOperationAction(ISD::FP_TO_FP16, MVT::f80, Expand);
390
391 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
392 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
393 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
394 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
395 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
396 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
397
398 if (Subtarget.hasPOPCNT()) {
399 setOperationAction(ISD::CTPOP , MVT::i8 , Promote);
400 } else {
401 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
402 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
403 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
404 if (Subtarget.is64Bit())
405 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
406 }
407
408 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
409
410 if (!Subtarget.hasMOVBE())
411 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
412
413 // These should be promoted to a larger select which is supported.
414 setOperationAction(ISD::SELECT , MVT::i1 , Promote);
415 // X86 wants to expand cmov itself.
416 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
417 setOperationAction(ISD::SELECT, VT, Custom);
418 setOperationAction(ISD::SETCC, VT, Custom);
419 }
420 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
421 if (VT == MVT::i64 && !Subtarget.is64Bit())
422 continue;
423 setOperationAction(ISD::SELECT, VT, Custom);
424 setOperationAction(ISD::SETCC, VT, Custom);
425 setOperationAction(ISD::SETCCE, VT, Custom);
426 }
427 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
428 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
429 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
430 // support continuation, user-level threading, and etc.. As a result, no
431 // other SjLj exception interfaces are implemented and please don't build
432 // your own exception handling based on them.
433 // LLVM/Clang supports zero-cost DWARF exception handling.
434 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
435 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
436 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
437 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
438 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
439
440 // Darwin ABI issue.
441 for (auto VT : { MVT::i32, MVT::i64 }) {
442 if (VT == MVT::i64 && !Subtarget.is64Bit())
443 continue;
444 setOperationAction(ISD::ConstantPool , VT, Custom);
445 setOperationAction(ISD::JumpTable , VT, Custom);
446 setOperationAction(ISD::GlobalAddress , VT, Custom);
447 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
448 setOperationAction(ISD::ExternalSymbol , VT, Custom);
449 setOperationAction(ISD::BlockAddress , VT, Custom);
450 }
451
452 // 64-bit shl, sra, srl (iff 32-bit x86)
453 for (auto VT : { MVT::i32, MVT::i64 }) {
454 if (VT == MVT::i64 && !Subtarget.is64Bit())
455 continue;
456 setOperationAction(ISD::SHL_PARTS, VT, Custom);
457 setOperationAction(ISD::SRA_PARTS, VT, Custom);
458 setOperationAction(ISD::SRL_PARTS, VT, Custom);
459 }
460
461 if (Subtarget.hasSSE1())
462 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
463
464 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
465
466 // Expand certain atomics
467 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
468 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
469 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
470 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
471 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
472 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
473 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
474 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
475 }
476
477 if (Subtarget.hasCmpxchg16b()) {
478 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
479 }
480
481 // FIXME - use subtarget debug flags
482 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
483 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
484 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
485 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
486 }
487
488 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
489 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
490
491 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
492 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
493
494 setOperationAction(ISD::TRAP, MVT::Other, Legal);
495 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
496
497 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
498 setOperationAction(ISD::VASTART , MVT::Other, Custom);
499 setOperationAction(ISD::VAEND , MVT::Other, Expand);
500 bool Is64Bit = Subtarget.is64Bit();
501 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
502 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
503
504 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
505 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
506
507 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
508
509 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
510 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
511 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
512
513 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
514 // f32 and f64 use SSE.
515 // Set up the FP register classes.
516 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
517 : &X86::FR32RegClass);
518 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
519 : &X86::FR64RegClass);
520
521 for (auto VT : { MVT::f32, MVT::f64 }) {
522 // Use ANDPD to simulate FABS.
523 setOperationAction(ISD::FABS, VT, Custom);
524
525 // Use XORP to simulate FNEG.
526 setOperationAction(ISD::FNEG, VT, Custom);
527
528 // Use ANDPD and ORPD to simulate FCOPYSIGN.
529 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
530
531 // We don't support sin/cos/fmod
532 setOperationAction(ISD::FSIN , VT, Expand);
533 setOperationAction(ISD::FCOS , VT, Expand);
534 setOperationAction(ISD::FSINCOS, VT, Expand);
535 }
536
537 // Lower this to MOVMSK plus an AND.
538 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
539 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
540
541 // Expand FP immediates into loads from the stack, except for the special
542 // cases we handle.
543 addLegalFPImmediate(APFloat(+0.0)); // xorpd
544 addLegalFPImmediate(APFloat(+0.0f)); // xorps
545 } else if (UseX87 && X86ScalarSSEf32) {
546 // Use SSE for f32, x87 for f64.
547 // Set up the FP register classes.
548 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
549 : &X86::FR32RegClass);
550 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
551
552 // Use ANDPS to simulate FABS.
553 setOperationAction(ISD::FABS , MVT::f32, Custom);
554
555 // Use XORP to simulate FNEG.
556 setOperationAction(ISD::FNEG , MVT::f32, Custom);
557
558 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
559
560 // Use ANDPS and ORPS to simulate FCOPYSIGN.
561 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
562 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
563
564 // We don't support sin/cos/fmod
565 setOperationAction(ISD::FSIN , MVT::f32, Expand);
566 setOperationAction(ISD::FCOS , MVT::f32, Expand);
567 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
568
569 // Special cases we handle for FP constants.
570 addLegalFPImmediate(APFloat(+0.0f)); // xorps
571 addLegalFPImmediate(APFloat(+0.0)); // FLD0
572 addLegalFPImmediate(APFloat(+1.0)); // FLD1
573 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
574 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
575
576 if (!TM.Options.UnsafeFPMath) {
577 setOperationAction(ISD::FSIN , MVT::f64, Expand);
578 setOperationAction(ISD::FCOS , MVT::f64, Expand);
579 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
580 }
581 } else if (UseX87) {
582 // f32 and f64 in x87.
583 // Set up the FP register classes.
584 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
585 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
586
587 for (auto VT : { MVT::f32, MVT::f64 }) {
588 setOperationAction(ISD::UNDEF, VT, Expand);
589 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
590
591 if (!TM.Options.UnsafeFPMath) {
592 setOperationAction(ISD::FSIN , VT, Expand);
593 setOperationAction(ISD::FCOS , VT, Expand);
594 setOperationAction(ISD::FSINCOS, VT, Expand);
595 }
596 }
597 addLegalFPImmediate(APFloat(+0.0)); // FLD0
598 addLegalFPImmediate(APFloat(+1.0)); // FLD1
599 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
600 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
601 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
602 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
603 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
604 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
605 }
606
607 // We don't support FMA.
608 setOperationAction(ISD::FMA, MVT::f64, Expand);
609 setOperationAction(ISD::FMA, MVT::f32, Expand);
610
611 // Long double always uses X87, except f128 in MMX.
612 if (UseX87) {
613 if (Subtarget.is64Bit() && Subtarget.hasMMX()) {
614 addRegisterClass(MVT::f128, &X86::FR128RegClass);
615 ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat);
616 setOperationAction(ISD::FABS , MVT::f128, Custom);
617 setOperationAction(ISD::FNEG , MVT::f128, Custom);
618 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
619 }
620
621 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
622 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
623 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
624 {
625 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
626 addLegalFPImmediate(TmpFlt); // FLD0
627 TmpFlt.changeSign();
628 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
629
630 bool ignored;
631 APFloat TmpFlt2(+1.0);
632 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
633 &ignored);
634 addLegalFPImmediate(TmpFlt2); // FLD1
635 TmpFlt2.changeSign();
636 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
637 }
638
639 if (!TM.Options.UnsafeFPMath) {
640 setOperationAction(ISD::FSIN , MVT::f80, Expand);
641 setOperationAction(ISD::FCOS , MVT::f80, Expand);
642 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
643 }
644
645 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
646 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
647 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
648 setOperationAction(ISD::FRINT, MVT::f80, Expand);
649 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
650 setOperationAction(ISD::FMA, MVT::f80, Expand);
651 }
652
653 // Always use a library call for pow.
654 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
655 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
656 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
657
658 setOperationAction(ISD::FLOG, MVT::f80, Expand);
659 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
660 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
661 setOperationAction(ISD::FEXP, MVT::f80, Expand);
662 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
663 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
664 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
665
666 // Some FP actions are always expanded for vector types.
667 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
668 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
669 setOperationAction(ISD::FSIN, VT, Expand);
670 setOperationAction(ISD::FSINCOS, VT, Expand);
671 setOperationAction(ISD::FCOS, VT, Expand);
672 setOperationAction(ISD::FREM, VT, Expand);
673 setOperationAction(ISD::FPOWI, VT, Expand);
674 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
675 setOperationAction(ISD::FPOW, VT, Expand);
676 setOperationAction(ISD::FLOG, VT, Expand);
677 setOperationAction(ISD::FLOG2, VT, Expand);
678 setOperationAction(ISD::FLOG10, VT, Expand);
679 setOperationAction(ISD::FEXP, VT, Expand);
680 setOperationAction(ISD::FEXP2, VT, Expand);
681 }
682
683 // First set operation action for all vector types to either promote
684 // (for widening) or expand (for scalarization). Then we will selectively
685 // turn on ones that can be effectively codegen'd.
686 for (MVT VT : MVT::vector_valuetypes()) {
687 setOperationAction(ISD::SDIV, VT, Expand);
688 setOperationAction(ISD::UDIV, VT, Expand);
689 setOperationAction(ISD::SREM, VT, Expand);
690 setOperationAction(ISD::UREM, VT, Expand);
691 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
692 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
693 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
694 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
695 setOperationAction(ISD::FMA, VT, Expand);
696 setOperationAction(ISD::FFLOOR, VT, Expand);
697 setOperationAction(ISD::FCEIL, VT, Expand);
698 setOperationAction(ISD::FTRUNC, VT, Expand);
699 setOperationAction(ISD::FRINT, VT, Expand);
700 setOperationAction(ISD::FNEARBYINT, VT, Expand);
701 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
702 setOperationAction(ISD::MULHS, VT, Expand);
703 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
704 setOperationAction(ISD::MULHU, VT, Expand);
705 setOperationAction(ISD::SDIVREM, VT, Expand);
706 setOperationAction(ISD::UDIVREM, VT, Expand);
707 setOperationAction(ISD::CTPOP, VT, Expand);
708 setOperationAction(ISD::CTTZ, VT, Expand);
709 setOperationAction(ISD::CTLZ, VT, Expand);
710 setOperationAction(ISD::ROTL, VT, Expand);
711 setOperationAction(ISD::ROTR, VT, Expand);
712 setOperationAction(ISD::BSWAP, VT, Expand);
713 setOperationAction(ISD::SETCC, VT, Expand);
714 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
715 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
716 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
717 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
718 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
719 setOperationAction(ISD::TRUNCATE, VT, Expand);
720 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
721 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
722 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
723 setOperationAction(ISD::SELECT_CC, VT, Expand);
724 for (MVT InnerVT : MVT::vector_valuetypes()) {
725 setTruncStoreAction(InnerVT, VT, Expand);
726
727 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
728 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
729
730 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
731 // types, we have to deal with them whether we ask for Expansion or not.
732 // Setting Expand causes its own optimisation problems though, so leave
733 // them legal.
734 if (VT.getVectorElementType() == MVT::i1)
735 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
736
737 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
738 // split/scalarized right now.
739 if (VT.getVectorElementType() == MVT::f16)
740 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
741 }
742 }
743
744 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
745 // with -msoft-float, disable use of MMX as well.
746 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
747 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
748 // No operations on x86mmx supported, everything uses intrinsics.
749 }
750
751 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
752 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
753 : &X86::VR128RegClass);
754
755 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
756 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
757 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
758 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
759 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
760 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
761 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
762 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
763 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
764 }
765
766 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
767 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
768 : &X86::VR128RegClass);
769
770 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
771 // registers cannot be used even for integer operations.
772 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
773 : &X86::VR128RegClass);
774 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
775 : &X86::VR128RegClass);
776 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
777 : &X86::VR128RegClass);
778 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
779 : &X86::VR128RegClass);
780
781 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
782 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
783 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
784 setOperationAction(ISD::UMUL_LOHI, MVT::v4i32, Custom);
785 setOperationAction(ISD::SMUL_LOHI, MVT::v4i32, Custom);
786 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
787 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
788 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
789 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
790 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
791 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
792 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
793 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
794
795 setOperationAction(ISD::SMAX, MVT::v8i16, Legal);
796 setOperationAction(ISD::UMAX, MVT::v16i8, Legal);
797 setOperationAction(ISD::SMIN, MVT::v8i16, Legal);
798 setOperationAction(ISD::UMIN, MVT::v16i8, Legal);
799
800 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
801 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
802 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
803
804 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
805 setOperationAction(ISD::SETCC, VT, Custom);
806 setOperationAction(ISD::CTPOP, VT, Custom);
807 setOperationAction(ISD::CTTZ, VT, Custom);
808 }
809
810 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
811 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
812 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
813 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
814 setOperationAction(ISD::VSELECT, VT, Custom);
815 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
816 }
817
818 // We support custom legalizing of sext and anyext loads for specific
819 // memory vector types which we can load as a scalar (or sequence of
820 // scalars) and extend in-register to a legal 128-bit vector type. For sext
821 // loads these must work with a single scalar load.
822 for (MVT VT : MVT::integer_vector_valuetypes()) {
823 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i8, Custom);
824 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v4i16, Custom);
825 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v8i8, Custom);
826 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i8, Custom);
827 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i16, Custom);
828 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2i32, Custom);
829 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i8, Custom);
830 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4i16, Custom);
831 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8i8, Custom);
832 }
833
834 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
835 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
836 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
837 setOperationAction(ISD::VSELECT, VT, Custom);
838
839 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
840 continue;
841
842 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
843 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
844 }
845
846 // Promote v16i8, v8i16, v4i32 load, select, and, or, xor to v2i64.
847 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
848 setOperationPromotedToType(ISD::AND, VT, MVT::v2i64);
849 setOperationPromotedToType(ISD::OR, VT, MVT::v2i64);
850 setOperationPromotedToType(ISD::XOR, VT, MVT::v2i64);
851 setOperationPromotedToType(ISD::LOAD, VT, MVT::v2i64);
852 setOperationPromotedToType(ISD::SELECT, VT, MVT::v2i64);
853 }
854
855 // Custom lower v2i64 and v2f64 selects.
856 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
857 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
858
859 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
860 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
861
862 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
863 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
864
865 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Custom);
866 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
867 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
868
869 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
870 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
871
872 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
873 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
874
875 for (MVT VT : MVT::fp_vector_valuetypes())
876 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v2f32, Legal);
877
878 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
879 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
880 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
881
882 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
883 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
884 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
885
886 // In the customized shift lowering, the legal v4i32/v2i64 cases
887 // in AVX2 will be recognized.
888 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
889 setOperationAction(ISD::SRL, VT, Custom);
890 setOperationAction(ISD::SHL, VT, Custom);
891 setOperationAction(ISD::SRA, VT, Custom);
892 }
893 }
894
895 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
896 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
897 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
898 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
899 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
900 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
901 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
902 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
903 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
904 }
905
906 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
907 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
908 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
909 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
910 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
911 setOperationAction(ISD::FRINT, RoundedTy, Legal);
912 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
913 }
914
915 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
916 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
917 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
918 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
919 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
920 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
921 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
922 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
923
924 // FIXME: Do we need to handle scalar-to-vector here?
925 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
926
927 // We directly match byte blends in the backend as they match the VSELECT
928 // condition form.
929 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
930
931 // SSE41 brings specific instructions for doing vector sign extend even in
932 // cases where we don't have SRA.
933 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
934 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
935 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
936 }
937
938 for (MVT VT : MVT::integer_vector_valuetypes()) {
939 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i8, Custom);
940 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i16, Custom);
941 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::v2i32, Custom);
942 }
943
944 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
945 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
946 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
947 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
948 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
949 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
950 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
951 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
952 }
953
954 // i8 vectors are custom because the source register and source
955 // source memory operand types are not the same width.
956 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
957 }
958
959 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
960 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
961 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
962 setOperationAction(ISD::ROTL, VT, Custom);
963
964 // XOP can efficiently perform BITREVERSE with VPPERM.
965 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
966 setOperationAction(ISD::BITREVERSE, VT, Custom);
967
968 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
969 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
970 setOperationAction(ISD::BITREVERSE, VT, Custom);
971 }
972
973 if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
974 bool HasInt256 = Subtarget.hasInt256();
975
976 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
977 : &X86::VR256RegClass);
978 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
979 : &X86::VR256RegClass);
980 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
981 : &X86::VR256RegClass);
982 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
983 : &X86::VR256RegClass);
984 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
985 : &X86::VR256RegClass);
986 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
987 : &X86::VR256RegClass);
988
989 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
990 setOperationAction(ISD::FFLOOR, VT, Legal);
991 setOperationAction(ISD::FCEIL, VT, Legal);
992 setOperationAction(ISD::FTRUNC, VT, Legal);
993 setOperationAction(ISD::FRINT, VT, Legal);
994 setOperationAction(ISD::FNEARBYINT, VT, Legal);
995 setOperationAction(ISD::FNEG, VT, Custom);
996 setOperationAction(ISD::FABS, VT, Custom);
997 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
998 }
999
1000 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1001 // even though v8i16 is a legal type.
1002 setOperationAction(ISD::FP_TO_SINT, MVT::v8i16, Promote);
1003 setOperationAction(ISD::FP_TO_UINT, MVT::v8i16, Promote);
1004 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1005
1006 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
1007 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1008 setOperationAction(ISD::FP_ROUND, MVT::v4f32, Legal);
1009
1010 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
1011 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
1012
1013 for (MVT VT : MVT::fp_vector_valuetypes())
1014 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v4f32, Legal);
1015
1016 // In the customized shift lowering, the legal v8i32/v4i64 cases
1017 // in AVX2 will be recognized.
1018 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1019 setOperationAction(ISD::SRL, VT, Custom);
1020 setOperationAction(ISD::SHL, VT, Custom);
1021 setOperationAction(ISD::SRA, VT, Custom);
1022 }
1023
1024 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1025 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1026 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1027
1028 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1029 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1030 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1031 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1032 }
1033
1034 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1035 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1036 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1037 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1038
1039 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1040 setOperationAction(ISD::SETCC, VT, Custom);
1041 setOperationAction(ISD::CTPOP, VT, Custom);
1042 setOperationAction(ISD::CTTZ, VT, Custom);
1043 setOperationAction(ISD::CTLZ, VT, Custom);
1044 }
1045
1046 if (Subtarget.hasAnyFMA()) {
1047 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1048 MVT::v2f64, MVT::v4f64 })
1049 setOperationAction(ISD::FMA, VT, Legal);
1050 }
1051
1052 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1053 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1054 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1055 }
1056
1057 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1058 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1059 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1060 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1061
1062 setOperationAction(ISD::UMUL_LOHI, MVT::v8i32, Custom);
1063 setOperationAction(ISD::SMUL_LOHI, MVT::v8i32, Custom);
1064
1065 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1066 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1067 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1068 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1069
1070 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1071 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1072 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1073 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1074 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1075 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1076 }
1077
1078 if (HasInt256) {
1079 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i64, Custom);
1080 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i32, Custom);
1081 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i16, Custom);
1082
1083 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1084 // when we have a 256bit-wide blend with immediate.
1085 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1086
1087 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1088 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1089 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1090 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1091 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1092 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1093 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1094 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1095 }
1096 }
1097
1098 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1099 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1100 setOperationAction(ISD::MLOAD, VT, Legal);
1101 setOperationAction(ISD::MSTORE, VT, Legal);
1102 }
1103
1104 // Extract subvector is special because the value type
1105 // (result) is 128-bit but the source is 256-bit wide.
1106 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1107 MVT::v4f32, MVT::v2f64 }) {
1108 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1109 }
1110
1111 // Custom lower several nodes for 256-bit types.
1112 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1113 MVT::v8f32, MVT::v4f64 }) {
1114 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1115 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1116 setOperationAction(ISD::VSELECT, VT, Custom);
1117 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1118 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1119 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1120 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1121 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1122 }
1123
1124 if (HasInt256)
1125 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1126
1127 // Promote v32i8, v16i16, v8i32 select, and, or, xor to v4i64.
1128 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1129 setOperationPromotedToType(ISD::AND, VT, MVT::v4i64);
1130 setOperationPromotedToType(ISD::OR, VT, MVT::v4i64);
1131 setOperationPromotedToType(ISD::XOR, VT, MVT::v4i64);
1132 setOperationPromotedToType(ISD::LOAD, VT, MVT::v4i64);
1133 setOperationPromotedToType(ISD::SELECT, VT, MVT::v4i64);
1134 }
1135 }
1136
1137 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1138 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1139 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1140 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1141 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1142
1143 addRegisterClass(MVT::i1, &X86::VK1RegClass);
1144 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1145 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1146
1147 for (MVT VT : MVT::fp_vector_valuetypes())
1148 setLoadExtAction(ISD::EXTLOAD, VT, MVT::v8f32, Legal);
1149
1150 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
1151 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1152 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1153 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1154 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1155 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1156 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1157 }
1158 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
1159 setOperationAction(ISD::SETCC, MVT::i1, Custom);
1160 setOperationAction(ISD::SETCCE, MVT::i1, Custom);
1161 setOperationAction(ISD::SELECT_CC, MVT::i1, Expand);
1162 setOperationAction(ISD::XOR, MVT::i1, Legal);
1163 setOperationAction(ISD::OR, MVT::i1, Legal);
1164 setOperationAction(ISD::AND, MVT::i1, Legal);
1165 setOperationAction(ISD::SUB, MVT::i1, Custom);
1166 setOperationAction(ISD::ADD, MVT::i1, Custom);
1167 setOperationAction(ISD::MUL, MVT::i1, Custom);
1168
1169 for (MVT VT : {MVT::v2i64, MVT::v4i32, MVT::v8i32, MVT::v4i64, MVT::v8i16,
1170 MVT::v16i8, MVT::v16i16, MVT::v32i8, MVT::v16i32,
1171 MVT::v8i64, MVT::v32i16, MVT::v64i8}) {
1172 MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
1173 setLoadExtAction(ISD::SEXTLOAD, VT, MaskVT, Custom);
1174 setLoadExtAction(ISD::ZEXTLOAD, VT, MaskVT, Custom);
1175 setLoadExtAction(ISD::EXTLOAD, VT, MaskVT, Custom);
1176 setTruncStoreAction(VT, MaskVT, Custom);
1177 }
1178
1179 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1180 setOperationAction(ISD::FNEG, VT, Custom);
1181 setOperationAction(ISD::FABS, VT, Custom);
1182 setOperationAction(ISD::FMA, VT, Legal);
1183 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1184 }
1185
1186 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1187 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1188 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1189 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1190 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1191 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1192 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1193 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1194 setOperationAction(ISD::SINT_TO_FP, MVT::v16i8, Promote);
1195 setOperationAction(ISD::SINT_TO_FP, MVT::v16i16, Promote);
1196 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1197 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1198 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
1199 setOperationAction(ISD::UINT_TO_FP, MVT::v16i8, Custom);
1200 setOperationAction(ISD::UINT_TO_FP, MVT::v16i16, Custom);
1201 setOperationAction(ISD::SINT_TO_FP, MVT::v16i1, Custom);
1202 setOperationAction(ISD::UINT_TO_FP, MVT::v16i1, Custom);
1203 setOperationAction(ISD::SINT_TO_FP, MVT::v8i1, Custom);
1204 setOperationAction(ISD::UINT_TO_FP, MVT::v8i1, Custom);
1205 setOperationAction(ISD::SINT_TO_FP, MVT::v4i1, Custom);
1206 setOperationAction(ISD::UINT_TO_FP, MVT::v4i1, Custom);
1207 setOperationAction(ISD::SINT_TO_FP, MVT::v2i1, Custom);
1208 setOperationAction(ISD::UINT_TO_FP, MVT::v2i1, Custom);
1209 setOperationAction(ISD::FP_ROUND, MVT::v8f32, Legal);
1210 setOperationAction(ISD::FP_EXTEND, MVT::v8f32, Legal);
1211
1212 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1213 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1214 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1215 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1216 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1217 if (Subtarget.hasVLX()){
1218 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1219 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1220 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1221 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1222 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1223
1224 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1225 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1226 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1227 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1228 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1229 } else {
1230 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1231 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1232 setOperationAction(ISD::MLOAD, VT, Custom);
1233 setOperationAction(ISD::MSTORE, VT, Custom);
1234 }
1235 }
1236 setOperationAction(ISD::TRUNCATE, MVT::i1, Custom);
1237 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1238 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Custom);
1239
1240 if (Subtarget.hasDQI()) {
1241 for (auto VT : { MVT::v2i64, MVT::v4i64, MVT::v8i64 }) {
1242 setOperationAction(ISD::SINT_TO_FP, VT, Legal);
1243 setOperationAction(ISD::UINT_TO_FP, VT, Legal);
1244 setOperationAction(ISD::FP_TO_SINT, VT, Legal);
1245 setOperationAction(ISD::FP_TO_UINT, VT, Legal);
1246 }
1247 if (Subtarget.hasVLX()) {
1248 // Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion.
1249 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1250 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1251 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1252 }
1253 }
1254 if (Subtarget.hasVLX()) {
1255 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1256 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Legal);
1257 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1258 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Legal);
1259 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1260 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1261 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
1262 setOperationAction(ISD::ZERO_EXTEND, MVT::v4i32, Custom);
1263 setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Custom);
1264 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i32, Custom);
1265 setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Custom);
1266
1267 // FIXME. This commands are available on SSE/AVX2, add relevant patterns.
1268 setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i8, Legal);
1269 setLoadExtAction(ISD::EXTLOAD, MVT::v8i32, MVT::v8i16, Legal);
1270 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Legal);
1271 setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i16, Legal);
1272 setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i8, Legal);
1273 setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i16, Legal);
1274 setLoadExtAction(ISD::EXTLOAD, MVT::v4i64, MVT::v4i32, Legal);
1275 setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i8, Legal);
1276 setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i16, Legal);
1277 setLoadExtAction(ISD::EXTLOAD, MVT::v2i64, MVT::v2i32, Legal);
1278 }
1279
1280 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Custom);
1281 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1282 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1283 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1284 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1285 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1286 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1287 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i8, Custom);
1288 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i16, Custom);
1289 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i16, Custom);
1290
1291 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1292 setOperationAction(ISD::FFLOOR, VT, Legal);
1293 setOperationAction(ISD::FCEIL, VT, Legal);
1294 setOperationAction(ISD::FTRUNC, VT, Legal);
1295 setOperationAction(ISD::FRINT, VT, Legal);
1296 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1297 }
1298
1299 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64, Custom);
1300 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
1301
1302 // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
1303 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1304 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
1305
1306 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8f64, Custom);
1307 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i64, Custom);
1308 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16f32, Custom);
1309 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i32, Custom);
1310 setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i1, Custom);
1311
1312 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1313
1314 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom);
1315 setOperationAction(ISD::SELECT, MVT::v8f64, Custom);
1316 setOperationAction(ISD::SELECT, MVT::v8i64, Custom);
1317 setOperationAction(ISD::SELECT, MVT::v16f32, Custom);
1318
1319 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1320
1321 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1322 setOperationAction(ISD::ABS, MVT::v4i64, Legal);
1323 setOperationAction(ISD::ABS, MVT::v2i64, Legal);
1324
1325 for (auto VT : { MVT::v8i1, MVT::v16i1 }) {
1326 setOperationAction(ISD::ADD, VT, Custom);
1327 setOperationAction(ISD::SUB, VT, Custom);
1328 setOperationAction(ISD::MUL, VT, Custom);
1329 setOperationAction(ISD::SETCC, VT, Custom);
1330 setOperationAction(ISD::SELECT, VT, Custom);
1331 setOperationAction(ISD::TRUNCATE, VT, Custom);
1332
1333 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1334 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1335 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1336 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1337 setOperationAction(ISD::VSELECT, VT, Expand);
1338 }
1339
1340 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1341 setOperationAction(ISD::SMAX, VT, Legal);
1342 setOperationAction(ISD::UMAX, VT, Legal);
1343 setOperationAction(ISD::SMIN, VT, Legal);
1344 setOperationAction(ISD::UMIN, VT, Legal);
1345 setOperationAction(ISD::ABS, VT, Legal);
1346 setOperationAction(ISD::SRL, VT, Custom);
1347 setOperationAction(ISD::SHL, VT, Custom);
1348 setOperationAction(ISD::SRA, VT, Custom);
1349 setOperationAction(ISD::CTPOP, VT, Custom);
1350 setOperationAction(ISD::CTTZ, VT, Custom);
1351 }
1352
1353 // Need to promote to 64-bit even though we have 32-bit masked instructions
1354 // because the IR optimizers rearrange bitcasts around logic ops leaving
1355 // too many variations to handle if we don't promote them.
1356 setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
1357 setOperationPromotedToType(ISD::OR, MVT::v16i32, MVT::v8i64);
1358 setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
1359
1360 if (Subtarget.hasCDI()) {
1361 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1362 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v16i32, MVT::v2i64,
1363 MVT::v4i64, MVT::v8i64}) {
1364 setOperationAction(ISD::CTLZ, VT, Legal);
1365 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
1366 }
1367 } // Subtarget.hasCDI()
1368
1369 if (Subtarget.hasDQI()) {
1370 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1371 setOperationAction(ISD::MUL, MVT::v2i64, Legal);
1372 setOperationAction(ISD::MUL, MVT::v4i64, Legal);
1373 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1374 }
1375
1376 // Custom lower several nodes.
1377 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1378 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1379 setOperationAction(ISD::MGATHER, VT, Custom);
1380 setOperationAction(ISD::MSCATTER, VT, Custom);
1381 }
1382 // Extract subvector is special because the value type
1383 // (result) is 256-bit but the source is 512-bit wide.
1384 // 128-bit was made Custom under AVX1.
1385 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1386 MVT::v8f32, MVT::v4f64 })
1387 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1388 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1,
1389 MVT::v16i1, MVT::v32i1, MVT::v64i1 })
1390 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1391
1392 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1393 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1394 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1395 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1396 setOperationAction(ISD::VSELECT, VT, Custom);
1397 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1398 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1399 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1400 setOperationAction(ISD::MLOAD, VT, Legal);
1401 setOperationAction(ISD::MSTORE, VT, Legal);
1402 setOperationAction(ISD::MGATHER, VT, Legal);
1403 setOperationAction(ISD::MSCATTER, VT, Custom);
1404 }
1405 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
1406 setOperationPromotedToType(ISD::LOAD, VT, MVT::v8i64);
1407 setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
1408 }
1409 }// has AVX-512
1410
1411 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1412 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1413 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1414
1415 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1416 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1417
1418 setOperationAction(ISD::ADD, MVT::v32i1, Custom);
1419 setOperationAction(ISD::ADD, MVT::v64i1, Custom);
1420 setOperationAction(ISD::SUB, MVT::v32i1, Custom);
1421 setOperationAction(ISD::SUB, MVT::v64i1, Custom);
1422 setOperationAction(ISD::MUL, MVT::v32i1, Custom);
1423 setOperationAction(ISD::MUL, MVT::v64i1, Custom);
1424
1425 setOperationAction(ISD::SETCC, MVT::v32i1, Custom);
1426 setOperationAction(ISD::SETCC, MVT::v64i1, Custom);
1427 setOperationAction(ISD::MUL, MVT::v32i16, Legal);
1428 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1429 setOperationAction(ISD::MULHS, MVT::v32i16, Legal);
1430 setOperationAction(ISD::MULHU, MVT::v32i16, Legal);
1431 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i1, Custom);
1432 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i1, Custom);
1433 setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i16, Custom);
1434 setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i8, Custom);
1435 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i1, Custom);
1436 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i1, Custom);
1437 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v32i16, Legal);
1438 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v64i8, Legal);
1439 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
1440 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
1441 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1, Custom);
1442 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i1, Custom);
1443 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v32i16, Custom);
1444 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v64i8, Custom);
1445 setOperationAction(ISD::SELECT, MVT::v32i1, Custom);
1446 setOperationAction(ISD::SELECT, MVT::v64i1, Custom);
1447 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1448 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1449 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1450 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1451 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1452 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
1453 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8, Custom);
1454 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1455 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1456 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i1, Custom);
1457 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i1, Custom);
1458 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v32i16, Custom);
1459 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v64i8, Custom);
1460 setOperationAction(ISD::TRUNCATE, MVT::v32i1, Custom);
1461 setOperationAction(ISD::TRUNCATE, MVT::v64i1, Custom);
1462 setOperationAction(ISD::TRUNCATE, MVT::v32i8, Custom);
1463 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i1, Custom);
1464 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i1, Custom);
1465 setOperationAction(ISD::BUILD_VECTOR, MVT::v32i1, Custom);
1466 setOperationAction(ISD::BUILD_VECTOR, MVT::v64i1, Custom);
1467 setOperationAction(ISD::VSELECT, MVT::v32i1, Expand);
1468 setOperationAction(ISD::VSELECT, MVT::v64i1, Expand);
1469 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1470
1471 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
1472
1473 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1474 if (Subtarget.hasVLX()) {
1475 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1476 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1477 }
1478
1479 LegalizeAction Action = Subtarget.hasVLX() ? Legal : Custom;
1480 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1481 setOperationAction(ISD::MLOAD, VT, Action);
1482 setOperationAction(ISD::MSTORE, VT, Action);
1483 }
1484
1485 if (Subtarget.hasCDI()) {
1486 setOperationAction(ISD::CTLZ, MVT::v32i16, Custom);
1487 setOperationAction(ISD::CTLZ, MVT::v64i8, Custom);
1488 }
1489
1490 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1491 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1492 setOperationAction(ISD::VSELECT, VT, Custom);
1493 setOperationAction(ISD::ABS, VT, Legal);
1494 setOperationAction(ISD::SRL, VT, Custom);
1495 setOperationAction(ISD::SHL, VT, Custom);
1496 setOperationAction(ISD::SRA, VT, Custom);
1497 setOperationAction(ISD::MLOAD, VT, Legal);
1498 setOperationAction(ISD::MSTORE, VT, Legal);
1499 setOperationAction(ISD::CTPOP, VT, Custom);
1500 setOperationAction(ISD::CTTZ, VT, Custom);
1501 setOperationAction(ISD::SMAX, VT, Legal);
1502 setOperationAction(ISD::UMAX, VT, Legal);
1503 setOperationAction(ISD::SMIN, VT, Legal);
1504 setOperationAction(ISD::UMIN, VT, Legal);
1505
1506 setOperationPromotedToType(ISD::AND, VT, MVT::v8i64);
1507 setOperationPromotedToType(ISD::OR, VT, MVT::v8i64);
1508 setOperationPromotedToType(ISD::XOR, VT, MVT::v8i64);
1509 }
1510
1511 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD, ISD::EXTLOAD}) {
1512 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1513 if (Subtarget.hasVLX()) {
1514 // FIXME. This commands are available on SSE/AVX2, add relevant patterns.
1515 setLoadExtAction(ExtType, MVT::v16i16, MVT::v16i8, Legal);
1516 setLoadExtAction(ExtType, MVT::v8i16, MVT::v8i8, Legal);
1517 }
1518 }
1519 }
1520
1521 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1522 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1523 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1524
1525 for (auto VT : { MVT::v2i1, MVT::v4i1 }) {
1526 setOperationAction(ISD::ADD, VT, Custom);
1527 setOperationAction(ISD::SUB, VT, Custom);
1528 setOperationAction(ISD::MUL, VT, Custom);
1529 setOperationAction(ISD::VSELECT, VT, Expand);
1530
1531 setOperationAction(ISD::TRUNCATE, VT, Custom);
1532 setOperationAction(ISD::SETCC, VT, Custom);
1533 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1534 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1535 setOperationAction(ISD::SELECT, VT, Custom);
1536 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1537 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1538 }
1539
1540 setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i1, Custom);
1541 setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i1, Custom);
1542 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v8i1, Custom);
1543 setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v4i1, Custom);
1544
1545 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1546 setOperationAction(ISD::SMAX, VT, Legal);
1547 setOperationAction(ISD::UMAX, VT, Legal);
1548 setOperationAction(ISD::SMIN, VT, Legal);
1549 setOperationAction(ISD::UMIN, VT, Legal);
1550 }
1551 }
1552
1553 // We want to custom lower some of our intrinsics.
1554 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1555 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1556 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1557 if (!Subtarget.is64Bit()) {
1558 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1559 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
1560 }
1561
1562 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1563 // handle type legalization for these operations here.
1564 //
1565 // FIXME: We really should do custom legalization for addition and
1566 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1567 // than generic legalization for 64-bit multiplication-with-overflow, though.
1568 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1569 if (VT == MVT::i64 && !Subtarget.is64Bit())
1570 continue;
1571 // Add/Sub/Mul with overflow operations are custom lowered.
1572 setOperationAction(ISD::SADDO, VT, Custom);
1573 setOperationAction(ISD::UADDO, VT, Custom);
1574 setOperationAction(ISD::SSUBO, VT, Custom);
1575 setOperationAction(ISD::USUBO, VT, Custom);
1576 setOperationAction(ISD::SMULO, VT, Custom);
1577 setOperationAction(ISD::UMULO, VT, Custom);
1578
1579 // Support carry in as value rather than glue.
1580 setOperationAction(ISD::ADDCARRY, VT, Custom);
1581 setOperationAction(ISD::SUBCARRY, VT, Custom);
1582 }
1583
1584 if (!Subtarget.is64Bit()) {
1585 // These libcalls are not available in 32-bit.
1586 setLibcallName(RTLIB::SHL_I128, nullptr);
1587 setLibcallName(RTLIB::SRL_I128, nullptr);
1588 setLibcallName(RTLIB::SRA_I128, nullptr);
1589 }
1590
1591 // Combine sin / cos into one node or libcall if possible.
1592 if (Subtarget.hasSinCos()) {
1593 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1594 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1595 if (Subtarget.isTargetDarwin()) {
1596 // For MacOSX, we don't want the normal expansion of a libcall to sincos.
1597 // We want to issue a libcall to __sincos_stret to avoid memory traffic.
1598 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1599 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1600 }
1601 }
1602
1603 if (Subtarget.isTargetWin64()) {
1604 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1605 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1606 setOperationAction(ISD::SREM, MVT::i128, Custom);
1607 setOperationAction(ISD::UREM, MVT::i128, Custom);
1608 setOperationAction(ISD::SDIVREM, MVT::i128, Custom);
1609 setOperationAction(ISD::UDIVREM, MVT::i128, Custom);
1610 }
1611
1612 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1613 // is. We should promote the value to 64-bits to solve this.
1614 // This is what the CRT headers do - `fmodf` is an inline header
1615 // function casting to f64 and calling `fmod`.
1616 if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
1617 Subtarget.isTargetWindowsItanium()))
1618 for (ISD::NodeType Op :
1619 {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
1620 ISD::FLOG10, ISD::FPOW, ISD::FSIN})
1621 if (isOperationExpand(Op, MVT::f32))
1622 setOperationAction(Op, MVT::f32, Promote);
1623
1624 // We have target-specific dag combine patterns for the following nodes:
1625 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
1626 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
1627 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
1628 setTargetDAGCombine(ISD::BITCAST);
1629 setTargetDAGCombine(ISD::VSELECT);
1630 setTargetDAGCombine(ISD::SELECT);
1631 setTargetDAGCombine(ISD::SHL);
1632 setTargetDAGCombine(ISD::SRA);
1633 setTargetDAGCombine(ISD::SRL);
1634 setTargetDAGCombine(ISD::OR);
1635 setTargetDAGCombine(ISD::AND);
1636 setTargetDAGCombine(ISD::ADD);
1637 setTargetDAGCombine(ISD::FADD);
1638 setTargetDAGCombine(ISD::FSUB);
1639 setTargetDAGCombine(ISD::FNEG);
1640 setTargetDAGCombine(ISD::FMA);
1641 setTargetDAGCombine(ISD::FMINNUM);
1642 setTargetDAGCombine(ISD::FMAXNUM);
1643 setTargetDAGCombine(ISD::SUB);
1644 setTargetDAGCombine(ISD::LOAD);
1645 setTargetDAGCombine(ISD::MLOAD);
1646 setTargetDAGCombine(ISD::STORE);
1647 setTargetDAGCombine(ISD::MSTORE);
1648 setTargetDAGCombine(ISD::TRUNCATE);
1649 setTargetDAGCombine(ISD::ZERO_EXTEND);
1650 setTargetDAGCombine(ISD::ANY_EXTEND);
1651 setTargetDAGCombine(ISD::SIGN_EXTEND);
1652 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
1653 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
1654 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
1655 setTargetDAGCombine(ISD::SINT_TO_FP);
1656 setTargetDAGCombine(ISD::UINT_TO_FP);
1657 setTargetDAGCombine(ISD::SETCC);
1658 setTargetDAGCombine(ISD::MUL);
1659 setTargetDAGCombine(ISD::XOR);
1660 setTargetDAGCombine(ISD::MSCATTER);
1661 setTargetDAGCombine(ISD::MGATHER);
1662
1663 computeRegisterProperties(Subtarget.getRegisterInfo());
1664
1665 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
1666 MaxStoresPerMemsetOptSize = 8;
1667 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
1668 MaxStoresPerMemcpyOptSize = 4;
1669 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
1670 MaxStoresPerMemmoveOptSize = 4;
1671 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
1672 setPrefLoopAlignment(ExperimentalPrefLoopAlignment);
1673
1674 // An out-of-order CPU can speculatively execute past a predictable branch,
1675 // but a conditional move could be stalled by an expensive earlier operation.
1676 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
1677 EnableExtLdPromotion = true;
1678 setPrefFunctionAlignment(4); // 2^4 bytes.
1679
1680 verifyIntrinsicTables();
1681}
1682
1683// This has so far only been implemented for 64-bit MachO.
1684bool X86TargetLowering::useLoadStackGuardNode() const {
1685 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
1686}
1687
1688TargetLoweringBase::LegalizeTypeAction
1689X86TargetLowering::getPreferredVectorAction(EVT VT) const {
1690 if (ExperimentalVectorWideningLegalization &&
1691 VT.getVectorNumElements() != 1 &&
1692 VT.getVectorElementType().getSimpleVT() != MVT::i1)
1693 return TypeWidenVector;
1694
1695 return TargetLoweringBase::getPreferredVectorAction(VT);
1696}
1697
1698EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
1699 LLVMContext& Context,
1700 EVT VT) const {
1701 if (!VT.isVector())
1702 return Subtarget.hasAVX512() ? MVT::i1: MVT::i8;
1703
1704 if (VT.isSimple()) {
1705 MVT VVT = VT.getSimpleVT();
1706 const unsigned NumElts = VVT.getVectorNumElements();
1707 MVT EltVT = VVT.getVectorElementType();
1708 if (VVT.is512BitVector()) {
1709 if (Subtarget.hasAVX512())
1710 if (EltVT == MVT::i32 || EltVT == MVT::i64 ||
1711 EltVT == MVT::f32 || EltVT == MVT::f64)
1712 switch(NumElts) {
1713 case 8: return MVT::v8i1;
1714 case 16: return MVT::v16i1;
1715 }
1716 if (Subtarget.hasBWI())
1717 if (EltVT == MVT::i8 || EltVT == MVT::i16)
1718 switch(NumElts) {
1719 case 32: return MVT::v32i1;
1720 case 64: return MVT::v64i1;
1721 }
1722 }
1723
1724 if (Subtarget.hasBWI() && Subtarget.hasVLX())
1725 return MVT::getVectorVT(MVT::i1, NumElts);
1726
1727 if (!isTypeLegal(VT) && getTypeAction(Context, VT) == TypePromoteInteger) {
1728 EVT LegalVT = getTypeToTransformTo(Context, VT);
1729 EltVT = LegalVT.getVectorElementType().getSimpleVT();
1730 }
1731
1732 if (Subtarget.hasVLX() && EltVT.getSizeInBits() >= 32)
1733 switch(NumElts) {
1734 case 2: return MVT::v2i1;
1735 case 4: return MVT::v4i1;
1736 case 8: return MVT::v8i1;
1737 }
1738 }
1739
1740 return VT.changeVectorElementTypeToInteger();
1741}
1742
1743/// Helper for getByValTypeAlignment to determine
1744/// the desired ByVal argument alignment.
1745static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign) {
1746 if (MaxAlign == 16)
1747 return;
1748 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1749 if (VTy->getBitWidth() == 128)
1750 MaxAlign = 16;
1751 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1752 unsigned EltAlign = 0;
1753 getMaxByValAlign(ATy->getElementType(), EltAlign);
1754 if (EltAlign > MaxAlign)
1755 MaxAlign = EltAlign;
1756 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1757 for (auto *EltTy : STy->elements()) {
1758 unsigned EltAlign = 0;
1759 getMaxByValAlign(EltTy, EltAlign);
1760 if (EltAlign > MaxAlign)
1761 MaxAlign = EltAlign;
1762 if (MaxAlign == 16)
1763 break;
1764 }
1765 }
1766}
1767
1768/// Return the desired alignment for ByVal aggregate
1769/// function arguments in the caller parameter area. For X86, aggregates
1770/// that contain SSE vectors are placed at 16-byte boundaries while the rest
1771/// are at 4-byte boundaries.
1772unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
1773 const DataLayout &DL) const {
1774 if (Subtarget.is64Bit()) {
1775 // Max of 8 and alignment of type.
1776 unsigned TyAlign = DL.getABITypeAlignment(Ty);
1777 if (TyAlign > 8)
1778 return TyAlign;
1779 return 8;
1780 }
1781
1782 unsigned Align = 4;
1783 if (Subtarget.hasSSE1())
1784 getMaxByValAlign(Ty, Align);
1785 return Align;
1786}
1787
1788/// Returns the target specific optimal type for load
1789/// and store operations as a result of memset, memcpy, and memmove
1790/// lowering. If DstAlign is zero that means it's safe to destination
1791/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
1792/// means there isn't a need to check it against alignment requirement,
1793/// probably because the source does not need to be loaded. If 'IsMemset' is
1794/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
1795/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
1796/// source is constant so it does not need to be loaded.
1797/// It returns EVT::Other if the type should be determined using generic
1798/// target-independent logic.
1799EVT
1800X86TargetLowering::getOptimalMemOpType(uint64_t Size,
1801 unsigned DstAlign, unsigned SrcAlign,
1802 bool IsMemset, bool ZeroMemset,
1803 bool MemcpyStrSrc,
1804 MachineFunction &MF) const {
1805 const Function *F = MF.getFunction();
1806 if (!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
1807 if (Size >= 16 &&
1808 (!Subtarget.isUnalignedMem16Slow() ||
1809 ((DstAlign == 0 || DstAlign >= 16) &&
1810 (SrcAlign == 0 || SrcAlign >= 16)))) {
1811 // FIXME: Check if unaligned 32-byte accesses are slow.
1812 if (Size >= 32 && Subtarget.hasAVX()) {
1813 // Although this isn't a well-supported type for AVX1, we'll let
1814 // legalization and shuffle lowering produce the optimal codegen. If we
1815 // choose an optimal type with a vector element larger than a byte,
1816 // getMemsetStores() may create an intermediate splat (using an integer
1817 // multiply) before we splat as a vector.
1818 return MVT::v32i8;
1819 }
1820 if (Subtarget.hasSSE2())
1821 return MVT::v16i8;
1822 // TODO: Can SSE1 handle a byte vector?
1823 if (Subtarget.hasSSE1())
1824 return MVT::v4f32;
1825 } else if ((!IsMemset || ZeroMemset) && !MemcpyStrSrc && Size >= 8 &&
1826 !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
1827 // Do not use f64 to lower memcpy if source is string constant. It's
1828 // better to use i32 to avoid the loads.
1829 // Also, do not use f64 to lower memset unless this is a memset of zeros.
1830 // The gymnastics of splatting a byte value into an XMM register and then
1831 // only using 8-byte stores (because this is a CPU with slow unaligned
1832 // 16-byte accesses) makes that a loser.
1833 return MVT::f64;
1834 }
1835 }
1836 // This is a compromise. If we reach here, unaligned accesses may be slow on
1837 // this target. However, creating smaller, aligned accesses could be even
1838 // slower and would certainly be a lot more code.
1839 if (Subtarget.is64Bit() && Size >= 8)
1840 return MVT::i64;
1841 return MVT::i32;
1842}
1843
1844bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
1845 if (VT == MVT::f32)
1846 return X86ScalarSSEf32;
1847 else if (VT == MVT::f64)
1848 return X86ScalarSSEf64;
1849 return true;
1850}
1851
1852bool
1853X86TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1854 unsigned,
1855 unsigned,
1856 bool *Fast) const {
1857 if (Fast) {
1858 switch (VT.getSizeInBits()) {
1859 default:
1860 // 8-byte and under are always assumed to be fast.
1861 *Fast = true;
1862 break;
1863 case 128:
1864 *Fast = !Subtarget.isUnalignedMem16Slow();
1865 break;
1866 case 256:
1867 *Fast = !Subtarget.isUnalignedMem32Slow();
1868 break;
1869 // TODO: What about AVX-512 (512-bit) accesses?
1870 }
1871 }
1872 // Misaligned accesses of any size are always allowed.
1873 return true;
1874}
1875
1876/// Return the entry encoding for a jump table in the
1877/// current function. The returned value is a member of the
1878/// MachineJumpTableInfo::JTEntryKind enum.
1879unsigned X86TargetLowering::getJumpTableEncoding() const {
1880 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
1881 // symbol.
1882 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
1883 return MachineJumpTableInfo::EK_Custom32;
1884
1885 // Otherwise, use the normal jump table encoding heuristics.
1886 return TargetLowering::getJumpTableEncoding();
1887}
1888
1889bool X86TargetLowering::useSoftFloat() const {
1890 return Subtarget.useSoftFloat();
1891}
1892
1893void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
1894 ArgListTy &Args) const {
1895
1896 // Only relabel X86-32 for C / Stdcall CCs.
1897 if (Subtarget.is64Bit())
1898 return;
1899 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
1900 return;
1901 unsigned ParamRegs = 0;
1902 if (auto *M = MF->getFunction()->getParent())
1903 ParamRegs = M->getNumberRegisterParameters();
1904
1905 // Mark the first N int arguments as having reg
1906 for (unsigned Idx = 0; Idx < Args.size(); Idx++) {
1907 Type *T = Args[Idx].Ty;
1908 if (T->isPointerTy() || T->isIntegerTy())
1909 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
1910 unsigned numRegs = 1;
1911 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
1912 numRegs = 2;
1913 if (ParamRegs < numRegs)
1914 return;
1915 ParamRegs -= numRegs;
1916 Args[Idx].IsInReg = true;
1917 }
1918 }
1919}
1920
1921const MCExpr *
1922X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1923 const MachineBasicBlock *MBB,
1924 unsigned uid,MCContext &Ctx) const{
1925 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((isPositionIndependent() && Subtarget.isPICStyleGOT(
)) ? static_cast<void> (0) : __assert_fail ("isPositionIndependent() && Subtarget.isPICStyleGOT()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 1925, __PRETTY_FUNCTION__))
;
1926 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
1927 // entries.
1928 return MCSymbolRefExpr::create(MBB->getSymbol(),
1929 MCSymbolRefExpr::VK_GOTOFF, Ctx);
1930}
1931
1932/// Returns relocation base for the given PIC jumptable.
1933SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
1934 SelectionDAG &DAG) const {
1935 if (!Subtarget.is64Bit())
1936 // This doesn't have SDLoc associated with it, but is not really the
1937 // same as a Register.
1938 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
1939 getPointerTy(DAG.getDataLayout()));
1940 return Table;
1941}
1942
1943/// This returns the relocation base for the given PIC jumptable,
1944/// the same as getPICJumpTableRelocBase, but as an MCExpr.
1945const MCExpr *X86TargetLowering::
1946getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
1947 MCContext &Ctx) const {
1948 // X86-64 uses RIP relative addressing based on the jump table label.
1949 if (Subtarget.isPICStyleRIPRel())
1950 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
1951
1952 // Otherwise, the reference is relative to the PIC base.
1953 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
1954}
1955
1956std::pair<const TargetRegisterClass *, uint8_t>
1957X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1958 MVT VT) const {
1959 const TargetRegisterClass *RRC = nullptr;
1960 uint8_t Cost = 1;
1961 switch (VT.SimpleTy) {
1962 default:
1963 return TargetLowering::findRepresentativeClass(TRI, VT);
1964 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
1965 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
1966 break;
1967 case MVT::x86mmx:
1968 RRC = &X86::VR64RegClass;
1969 break;
1970 case MVT::f32: case MVT::f64:
1971 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1972 case MVT::v4f32: case MVT::v2f64:
1973 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
1974 case MVT::v8f32: case MVT::v4f64:
1975 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
1976 case MVT::v16f32: case MVT::v8f64:
1977 RRC = &X86::VR128XRegClass;
1978 break;
1979 }
1980 return std::make_pair(RRC, Cost);
1981}
1982
1983unsigned X86TargetLowering::getAddressSpace() const {
1984 if (Subtarget.is64Bit())
1985 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
1986 return 256;
1987}
1988
1989static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
1990 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
1991 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
1992}
1993
1994static Constant* SegmentOffset(IRBuilder<> &IRB,
1995 unsigned Offset, unsigned AddressSpace) {
1996 return ConstantExpr::getIntToPtr(
1997 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
1998 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
1999}
2000
2001Value *X86TargetLowering::getIRStackGuard(IRBuilder<> &IRB) const {
2002 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2003 // tcbhead_t; use it instead of the usual global variable (see
2004 // sysdeps/{i386,x86_64}/nptl/tls.h)
2005 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2006 if (Subtarget.isTargetFuchsia()) {
2007 // <magenta/tls.h> defines MX_TLS_STACK_GUARD_OFFSET with this value.
2008 return SegmentOffset(IRB, 0x10, getAddressSpace());
2009 } else {
2010 // %fs:0x28, unless we're using a Kernel code model, in which case
2011 // it's %gs:0x28. gs:0x14 on i386.
2012 unsigned Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2013 return SegmentOffset(IRB, Offset, getAddressSpace());
2014 }
2015 }
2016
2017 return TargetLowering::getIRStackGuard(IRB);
2018}
2019
2020void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2021 // MSVC CRT provides functionalities for stack protection.
2022 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
2023 // MSVC CRT has a global variable holding security cookie.
2024 M.getOrInsertGlobal("__security_cookie",
2025 Type::getInt8PtrTy(M.getContext()));
2026
2027 // MSVC CRT has a function to validate security cookie.
2028 auto *SecurityCheckCookie = cast<Function>(
2029 M.getOrInsertFunction("__security_check_cookie",
2030 Type::getVoidTy(M.getContext()),
2031 Type::getInt8PtrTy(M.getContext())));
2032 SecurityCheckCookie->setCallingConv(CallingConv::X86_FastCall);
2033 SecurityCheckCookie->addAttribute(1, Attribute::AttrKind::InReg);
2034 return;
2035 }
2036 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2037 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2038 return;
2039 TargetLowering::insertSSPDeclarations(M);
2040}
2041
2042Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2043 // MSVC CRT has a global variable holding security cookie.
2044 if (Subtarget.getTargetTriple().isOSMSVCRT())
2045 return M.getGlobalVariable("__security_cookie");
2046 return TargetLowering::getSDagStackGuard(M);
2047}
2048
2049Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2050 // MSVC CRT has a function to validate security cookie.
2051 if (Subtarget.getTargetTriple().isOSMSVCRT())
2052 return M.getFunction("__security_check_cookie");
2053 return TargetLowering::getSSPStackGuardCheck(M);
2054}
2055
2056Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
2057 if (Subtarget.getTargetTriple().isOSContiki())
2058 return getDefaultSafeStackPointerLocation(IRB, false);
2059
2060 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2061 // definition of TLS_SLOT_SAFESTACK in
2062 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2063 if (Subtarget.isTargetAndroid()) {
2064 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2065 // %gs:0x24 on i386
2066 unsigned Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2067 return SegmentOffset(IRB, Offset, getAddressSpace());
2068 }
2069
2070 // Fuchsia is similar.
2071 if (Subtarget.isTargetFuchsia()) {
2072 // <magenta/tls.h> defines MX_TLS_UNSAFE_SP_OFFSET with this value.
2073 return SegmentOffset(IRB, 0x18, getAddressSpace());
2074 }
2075
2076 return TargetLowering::getSafeStackPointerLocation(IRB);
2077}
2078
2079bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
2080 unsigned DestAS) const {
2081 assert(SrcAS != DestAS && "Expected different address spaces!")((SrcAS != DestAS && "Expected different address spaces!"
) ? static_cast<void> (0) : __assert_fail ("SrcAS != DestAS && \"Expected different address spaces!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2081, __PRETTY_FUNCTION__))
;
2082
2083 return SrcAS < 256 && DestAS < 256;
2084}
2085
2086//===----------------------------------------------------------------------===//
2087// Return Value Calling Convention Implementation
2088//===----------------------------------------------------------------------===//
2089
2090#include "X86GenCallingConv.inc"
2091
2092bool X86TargetLowering::CanLowerReturn(
2093 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2094 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2095 SmallVector<CCValAssign, 16> RVLocs;
2096 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2097 return CCInfo.CheckReturn(Outs, RetCC_X86);
2098}
2099
2100const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2101 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2102 return ScratchRegs;
2103}
2104
2105/// Lowers masks values (v*i1) to the local register values
2106/// \returns DAG node after lowering to register type
2107static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2108 const SDLoc &Dl, SelectionDAG &DAG) {
2109 EVT ValVT = ValArg.getValueType();
2110
2111 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2112 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2113 // Two stage lowering might be required
2114 // bitcast: v8i1 -> i8 / v16i1 -> i16
2115 // anyextend: i8 -> i32 / i16 -> i32
2116 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2117 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2118 if (ValLoc == MVT::i32)
2119 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2120 return ValToCopy;
2121 } else if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2122 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2123 // One stage lowering is required
2124 // bitcast: v32i1 -> i32 / v64i1 -> i64
2125 return DAG.getBitcast(ValLoc, ValArg);
2126 } else
2127 return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg);
2128}
2129
2130/// Breaks v64i1 value into two registers and adds the new node to the DAG
2131static void Passv64i1ArgInRegs(
2132 const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
2133 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
2134 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2135 assert((Subtarget.hasBWI() || Subtarget.hasBMI()) &&(((Subtarget.hasBWI() || Subtarget.hasBMI()) && "Expected AVX512BW or AVX512BMI target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI() || Subtarget.hasBMI()) && \"Expected AVX512BW or AVX512BMI target!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2136, __PRETTY_FUNCTION__))
2136 "Expected AVX512BW or AVX512BMI target!")(((Subtarget.hasBWI() || Subtarget.hasBMI()) && "Expected AVX512BW or AVX512BMI target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI() || Subtarget.hasBMI()) && \"Expected AVX512BW or AVX512BMI target!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2136, __PRETTY_FUNCTION__))
;
2137 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2137, __PRETTY_FUNCTION__))
;
2138 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"
) ? static_cast<void> (0) : __assert_fail ("Arg.getValueType() == MVT::i64 && \"Expecting 64 bit value\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2138, __PRETTY_FUNCTION__))
;
2139 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2140, __PRETTY_FUNCTION__))
2140 "The value should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The value should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The value should reside in two registers\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2140, __PRETTY_FUNCTION__))
;
2141
2142 // Before splitting the value we cast it to i64
2143 Arg = DAG.getBitcast(MVT::i64, Arg);
2144
2145 // Splitting the value into two i32 types
2146 SDValue Lo, Hi;
2147 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2148 DAG.getConstant(0, Dl, MVT::i32));
2149 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2150 DAG.getConstant(1, Dl, MVT::i32));
2151
2152 // Attach the two i32 types into corresponding registers
2153 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2154 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2155}
2156
2157SDValue
2158X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2159 bool isVarArg,
2160 const SmallVectorImpl<ISD::OutputArg> &Outs,
2161 const SmallVectorImpl<SDValue> &OutVals,
2162 const SDLoc &dl, SelectionDAG &DAG) const {
2163 MachineFunction &MF = DAG.getMachineFunction();
2164 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2165
2166 // In some cases we need to disable registers from the default CSR list.
2167 // For example, when they are used for argument passing.
2168 bool ShouldDisableCalleeSavedRegister =
2169 CallConv == CallingConv::X86_RegCall ||
2170 MF.getFunction()->hasFnAttribute("no_caller_saved_registers");
2171
2172 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2173 report_fatal_error("X86 interrupts may not return any value");
2174
2175 SmallVector<CCValAssign, 16> RVLocs;
2176 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2177 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2178
2179 SDValue Flag;
2180 SmallVector<SDValue, 6> RetOps;
2181 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2182 // Operand #1 = Bytes To Pop
2183 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2184 MVT::i32));
2185
2186 // Copy the result values into the output registers.
2187 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2188 ++I, ++OutsIndex) {
2189 CCValAssign &VA = RVLocs[I];
2190 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2190, __PRETTY_FUNCTION__))
;
2191
2192 // Add the register to the CalleeSaveDisableRegs list.
2193 if (ShouldDisableCalleeSavedRegister)
2194 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2195
2196 SDValue ValToCopy = OutVals[OutsIndex];
2197 EVT ValVT = ValToCopy.getValueType();
2198
2199 // Promote values to the appropriate types.
2200 if (VA.getLocInfo() == CCValAssign::SExt)
2201 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2202 else if (VA.getLocInfo() == CCValAssign::ZExt)
2203 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2204 else if (VA.getLocInfo() == CCValAssign::AExt) {
2205 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2206 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2207 else
2208 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2209 }
2210 else if (VA.getLocInfo() == CCValAssign::BCvt)
2211 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2212
2213 assert(VA.getLocInfo() != CCValAssign::FPExt &&((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2214, __PRETTY_FUNCTION__))
2214 "Unexpected FP-extend for return value.")((VA.getLocInfo() != CCValAssign::FPExt && "Unexpected FP-extend for return value."
) ? static_cast<void> (0) : __assert_fail ("VA.getLocInfo() != CCValAssign::FPExt && \"Unexpected FP-extend for return value.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2214, __PRETTY_FUNCTION__))
;
2215
2216 // If this is x86-64, and we disabled SSE, we can't return FP values,
2217 // or SSE or MMX vectors.
2218 if ((ValVT == MVT::f32 || ValVT == MVT::f64 ||
2219 VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) &&
2220 (Subtarget.is64Bit() && !Subtarget.hasSSE1())) {
2221 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2222 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2223 } else if (ValVT == MVT::f64 &&
2224 (Subtarget.is64Bit() && !Subtarget.hasSSE2())) {
2225 // Likewise we can't return F64 values with SSE1 only. gcc does so, but
2226 // llvm-gcc has never done it right and no one has noticed, so this
2227 // should be OK for now.
2228 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2229 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2230 }
2231
2232 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2233 // the RET instruction and handled by the FP Stackifier.
2234 if (VA.getLocReg() == X86::FP0 ||
2235 VA.getLocReg() == X86::FP1) {
2236 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2237 // change the value to the FP stack register class.
2238 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2239 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2240 RetOps.push_back(ValToCopy);
2241 // Don't emit a copytoreg.
2242 continue;
2243 }
2244
2245 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2246 // which is returned in RAX / RDX.
2247 if (Subtarget.is64Bit()) {
2248 if (ValVT == MVT::x86mmx) {
2249 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2250 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2251 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2252 ValToCopy);
2253 // If we don't have SSE2 available, convert to v4f32 so the generated
2254 // register is legal.
2255 if (!Subtarget.hasSSE2())
2256 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2257 }
2258 }
2259 }
2260
2261 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
2262
2263 if (VA.needsCustom()) {
2264 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2265, __PRETTY_FUNCTION__))
2265 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2265, __PRETTY_FUNCTION__))
;
2266
2267 Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
2268 Subtarget);
2269
2270 assert(2 == RegsToPass.size() &&((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2271, __PRETTY_FUNCTION__))
2271 "Expecting two registers after Pass64BitArgInRegs")((2 == RegsToPass.size() && "Expecting two registers after Pass64BitArgInRegs"
) ? static_cast<void> (0) : __assert_fail ("2 == RegsToPass.size() && \"Expecting two registers after Pass64BitArgInRegs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2271, __PRETTY_FUNCTION__))
;
2272
2273 // Add the second register to the CalleeSaveDisableRegs list.
2274 if (ShouldDisableCalleeSavedRegister)
2275 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2276 } else {
2277 RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2278 }
2279
2280 // Add nodes to the DAG and add the values into the RetOps list
2281 for (auto &Reg : RegsToPass) {
2282 Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
2283 Flag = Chain.getValue(1);
2284 RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
2285 }
2286 }
2287
2288 // Swift calling convention does not require we copy the sret argument
2289 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2290
2291 // All x86 ABIs require that for returning structs by value we copy
2292 // the sret argument into %rax/%eax (depending on ABI) for the return.
2293 // We saved the argument into a virtual register in the entry block,
2294 // so now we copy the value out and into %rax/%eax.
2295 //
2296 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2297 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2298 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2299 // either case FuncInfo->setSRetReturnReg() will have been called.
2300 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
2301 // When we have both sret and another return value, we should use the
2302 // original Chain stored in RetOps[0], instead of the current Chain updated
2303 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2304
2305 // For the case of sret and another return value, we have
2306 // Chain_0 at the function entry
2307 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2308 // If we use Chain_1 in getCopyFromReg, we will have
2309 // Val = getCopyFromReg(Chain_1)
2310 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2311
2312 // getCopyToReg(Chain_0) will be glued together with
2313 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2314 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2315 // Data dependency from Unit B to Unit A due to usage of Val in
2316 // getCopyToReg(Chain_1, Val)
2317 // Chain dependency from Unit A to Unit B
2318
2319 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2320 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2321 getPointerTy(MF.getDataLayout()));
2322
2323 unsigned RetValReg
2324 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2325 X86::RAX : X86::EAX;
2326 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2327 Flag = Chain.getValue(1);
2328
2329 // RAX/EAX now acts like a return value.
2330 RetOps.push_back(
2331 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2332
2333 // Add the returned register to the CalleeSaveDisableRegs list.
2334 if (ShouldDisableCalleeSavedRegister)
2335 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2336 }
2337
2338 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2339 const MCPhysReg *I =
2340 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2341 if (I) {
2342 for (; *I; ++I) {
2343 if (X86::GR64RegClass.contains(*I))
2344 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2345 else
2346 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2346)
;
2347 }
2348 }
2349
2350 RetOps[0] = Chain; // Update chain.
2351
2352 // Add the flag if we have it.
2353 if (Flag.getNode())
2354 RetOps.push_back(Flag);
2355
2356 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2357 if (CallConv == CallingConv::X86_INTR)
2358 opcode = X86ISD::IRET;
2359 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2360}
2361
2362bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2363 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2364 return false;
2365
2366 SDValue TCChain = Chain;
2367 SDNode *Copy = *N->use_begin();
2368 if (Copy->getOpcode() == ISD::CopyToReg) {
2369 // If the copy has a glue operand, we conservatively assume it isn't safe to
2370 // perform a tail call.
2371 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2372 return false;
2373 TCChain = Copy->getOperand(0);
2374 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2375 return false;
2376
2377 bool HasRet = false;
2378 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2379 UI != UE; ++UI) {
2380 if (UI->getOpcode() != X86ISD::RET_FLAG)
2381 return false;
2382 // If we are returning more than one value, we can definitely
2383 // not make a tail call see PR19530
2384 if (UI->getNumOperands() > 4)
2385 return false;
2386 if (UI->getNumOperands() == 4 &&
2387 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2388 return false;
2389 HasRet = true;
2390 }
2391
2392 if (!HasRet)
2393 return false;
2394
2395 Chain = TCChain;
2396 return true;
2397}
2398
2399EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2400 ISD::NodeType ExtendKind) const {
2401 MVT ReturnMVT = MVT::i32;
2402
2403 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2404 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2405 // The ABI does not require i1, i8 or i16 to be extended.
2406 //
2407 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2408 // always extending i8/i16 return values, so keep doing that for now.
2409 // (PR26665).
2410 ReturnMVT = MVT::i8;
2411 }
2412
2413 EVT MinVT = getRegisterType(Context, ReturnMVT);
2414 return VT.bitsLT(MinVT) ? MinVT : VT;
2415}
2416
2417/// Reads two 32 bit registers and creates a 64 bit mask value.
2418/// \param VA The current 32 bit value that need to be assigned.
2419/// \param NextVA The next 32 bit value that need to be assigned.
2420/// \param Root The parent DAG node.
2421/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2422/// glue purposes. In the case the DAG is already using
2423/// physical register instead of virtual, we should glue
2424/// our new SDValue to InFlag SDvalue.
2425/// \return a new SDvalue of size 64bit.
2426static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2427 SDValue &Root, SelectionDAG &DAG,
2428 const SDLoc &Dl, const X86Subtarget &Subtarget,
2429 SDValue *InFlag = nullptr) {
2430 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")(((Subtarget.hasBWI()) && "Expected AVX512BW target!"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasBWI()) && \"Expected AVX512BW target!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2430, __PRETTY_FUNCTION__))
;
2431 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((Subtarget.is32Bit() && "Expecting 32 bit target") ?
static_cast<void> (0) : __assert_fail ("Subtarget.is32Bit() && \"Expecting 32 bit target\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2431, __PRETTY_FUNCTION__))
;
2432 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2433, __PRETTY_FUNCTION__))
2433 "Expecting first location of 64 bit width type")((VA.getValVT() == MVT::v64i1 && "Expecting first location of 64 bit width type"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Expecting first location of 64 bit width type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2433, __PRETTY_FUNCTION__))
;
2434 assert(NextVA.getValVT() == VA.getValVT() &&((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2435, __PRETTY_FUNCTION__))
2435 "The locations should have the same type")((NextVA.getValVT() == VA.getValVT() && "The locations should have the same type"
) ? static_cast<void> (0) : __assert_fail ("NextVA.getValVT() == VA.getValVT() && \"The locations should have the same type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2435, __PRETTY_FUNCTION__))
;
2436 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2437, __PRETTY_FUNCTION__))
2437 "The values should reside in two registers")((VA.isRegLoc() && NextVA.isRegLoc() && "The values should reside in two registers"
) ? static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && NextVA.isRegLoc() && \"The values should reside in two registers\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2437, __PRETTY_FUNCTION__))
;
2438
2439 SDValue Lo, Hi;
2440 unsigned Reg;
2441 SDValue ArgValueLo, ArgValueHi;
2442
2443 MachineFunction &MF = DAG.getMachineFunction();
2444 const TargetRegisterClass *RC = &X86::GR32RegClass;
2445
2446 // Read a 32 bit value from the registers
2447 if (nullptr == InFlag) {
2448 // When no physical register is present,
2449 // create an intermediate virtual register
2450 Reg = MF.addLiveIn(VA.getLocReg(), RC);
2451 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2452 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2453 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2454 } else {
2455 // When a physical register is available read the value from it and glue
2456 // the reads together.
2457 ArgValueLo =
2458 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2459 *InFlag = ArgValueLo.getValue(2);
2460 ArgValueHi =
2461 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2462 *InFlag = ArgValueHi.getValue(2);
2463 }
2464
2465 // Convert the i32 type into v32i1 type
2466 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2467
2468 // Convert the i32 type into v32i1 type
2469 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2470
2471 // Concatenate the two values together
2472 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2473}
2474
2475/// The function will lower a register of various sizes (8/16/32/64)
2476/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2477/// \returns a DAG node contains the operand after lowering to mask type.
2478static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
2479 const EVT &ValLoc, const SDLoc &Dl,
2480 SelectionDAG &DAG) {
2481 SDValue ValReturned = ValArg;
2482
2483 if (ValVT == MVT::v64i1) {
2484 // In 32 bit machine, this case is handled by getv64i1Argument
2485 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((ValLoc == MVT::i64 && "Expecting only i64 locations"
) ? static_cast<void> (0) : __assert_fail ("ValLoc == MVT::i64 && \"Expecting only i64 locations\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2485, __PRETTY_FUNCTION__))
;
2486 // In 64 bit machine, There is no need to truncate the value only bitcast
2487 } else {
2488 MVT maskLen;
2489 switch (ValVT.getSimpleVT().SimpleTy) {
2490 case MVT::v8i1:
2491 maskLen = MVT::i8;
2492 break;
2493 case MVT::v16i1:
2494 maskLen = MVT::i16;
2495 break;
2496 case MVT::v32i1:
2497 maskLen = MVT::i32;
2498 break;
2499 default:
2500 llvm_unreachable("Expecting a vector of i1 types")::llvm::llvm_unreachable_internal("Expecting a vector of i1 types"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2500)
;
2501 }
2502
2503 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
2504 }
2505
2506 return DAG.getBitcast(ValVT, ValReturned);
2507}
2508
2509/// Lower the result values of a call into the
2510/// appropriate copies out of appropriate physical registers.
2511///
2512SDValue X86TargetLowering::LowerCallResult(
2513 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
2514 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2515 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
2516 uint32_t *RegMask) const {
2517
2518 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2519 // Assign locations to each value returned by this call.
2520 SmallVector<CCValAssign, 16> RVLocs;
2521 bool Is64Bit = Subtarget.is64Bit();
2522 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2523 *DAG.getContext());
2524 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2525
2526 // Copy all of the result registers out of their specified physreg.
2527 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
2528 ++I, ++InsIndex) {
2529 CCValAssign &VA = RVLocs[I];
2530 EVT CopyVT = VA.getLocVT();
2531
2532 // In some calling conventions we need to remove the used registers
2533 // from the register mask.
2534 if (RegMask) {
2535 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
2536 SubRegs.isValid(); ++SubRegs)
2537 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
2538 }
2539
2540 // If this is x86-64, and we disabled SSE, we can't return FP values
2541 if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
2542 ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
2543 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2544 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2545 }
2546
2547 // If we prefer to use the value in xmm registers, copy it out as f80 and
2548 // use a truncate to move it from fp stack reg to xmm reg.
2549 bool RoundAfterCopy = false;
2550 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
2551 isScalarFPTypeInSSEReg(VA.getValVT())) {
2552 if (!Subtarget.hasX87())
2553 report_fatal_error("X87 register return with X87 disabled");
2554 CopyVT = MVT::f80;
2555 RoundAfterCopy = (CopyVT != VA.getLocVT());
2556 }
2557
2558 SDValue Val;
2559 if (VA.needsCustom()) {
2560 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2561, __PRETTY_FUNCTION__))
2561 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2561, __PRETTY_FUNCTION__))
;
2562 Val =
2563 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
2564 } else {
2565 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
2566 .getValue(1);
2567 Val = Chain.getValue(0);
2568 InFlag = Chain.getValue(2);
2569 }
2570
2571 if (RoundAfterCopy)
2572 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
2573 // This truncation won't change the value.
2574 DAG.getIntPtrConstant(1, dl));
2575
2576 if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
2577 if (VA.getValVT().isVector() &&
2578 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2579 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
2580 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
2581 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
2582 } else
2583 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
2584 }
2585
2586 InVals.push_back(Val);
2587 }
2588
2589 return Chain;
2590}
2591
2592//===----------------------------------------------------------------------===//
2593// C & StdCall & Fast Calling Convention implementation
2594//===----------------------------------------------------------------------===//
2595// StdCall calling convention seems to be standard for many Windows' API
2596// routines and around. It differs from C calling convention just a little:
2597// callee should clean up the stack, not caller. Symbols should be also
2598// decorated in some fancy way :) It doesn't support any vector arguments.
2599// For info on fast calling convention see Fast Calling Convention (tail call)
2600// implementation LowerX86_32FastCCCallTo.
2601
2602/// CallIsStructReturn - Determines whether a call uses struct return
2603/// semantics.
2604enum StructReturnType {
2605 NotStructReturn,
2606 RegStructReturn,
2607 StackStructReturn
2608};
2609static StructReturnType
2610callIsStructReturn(const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsMCU) {
2611 if (Outs.empty())
2612 return NotStructReturn;
2613
2614 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
2615 if (!Flags.isSRet())
2616 return NotStructReturn;
2617 if (Flags.isInReg() || IsMCU)
2618 return RegStructReturn;
2619 return StackStructReturn;
2620}
2621
2622/// Determines whether a function uses struct return semantics.
2623static StructReturnType
2624argsAreStructReturn(const SmallVectorImpl<ISD::InputArg> &Ins, bool IsMCU) {
2625 if (Ins.empty())
2626 return NotStructReturn;
2627
2628 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
2629 if (!Flags.isSRet())
2630 return NotStructReturn;
2631 if (Flags.isInReg() || IsMCU)
2632 return RegStructReturn;
2633 return StackStructReturn;
2634}
2635
2636/// Make a copy of an aggregate at address specified by "Src" to address
2637/// "Dst" with size and alignment information specified by the specific
2638/// parameter attribute. The copy will be passed as a byval function parameter.
2639static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
2640 SDValue Chain, ISD::ArgFlagsTy Flags,
2641 SelectionDAG &DAG, const SDLoc &dl) {
2642 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
2643
2644 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
2645 /*isVolatile*/false, /*AlwaysInline=*/true,
2646 /*isTailCall*/false,
2647 MachinePointerInfo(), MachinePointerInfo());
2648}
2649
2650/// Return true if the calling convention is one that we can guarantee TCO for.
2651static bool canGuaranteeTCO(CallingConv::ID CC) {
2652 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
2653 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
2654 CC == CallingConv::HHVM);
2655}
2656
2657/// Return true if we might ever do TCO for calls with this calling convention.
2658static bool mayTailCallThisCC(CallingConv::ID CC) {
2659 switch (CC) {
2660 // C calling conventions:
2661 case CallingConv::C:
2662 case CallingConv::X86_64_Win64:
2663 case CallingConv::X86_64_SysV:
2664 // Callee pop conventions:
2665 case CallingConv::X86_ThisCall:
2666 case CallingConv::X86_StdCall:
2667 case CallingConv::X86_VectorCall:
2668 case CallingConv::X86_FastCall:
2669 return true;
2670 default:
2671 return canGuaranteeTCO(CC);
2672 }
2673}
2674
2675/// Return true if the function is being made into a tailcall target by
2676/// changing its ABI.
2677static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
2678 return GuaranteedTailCallOpt && canGuaranteeTCO(CC);
2679}
2680
2681bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2682 auto Attr =
2683 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2684 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2685 return false;
2686
2687 ImmutableCallSite CS(CI);
2688 CallingConv::ID CalleeCC = CS.getCallingConv();
2689 if (!mayTailCallThisCC(CalleeCC))
2690 return false;
2691
2692 return true;
2693}
2694
2695SDValue
2696X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
2697 const SmallVectorImpl<ISD::InputArg> &Ins,
2698 const SDLoc &dl, SelectionDAG &DAG,
2699 const CCValAssign &VA,
2700 MachineFrameInfo &MFI, unsigned i) const {
2701 // Create the nodes corresponding to a load from this parameter slot.
2702 ISD::ArgFlagsTy Flags = Ins[i].Flags;
2703 bool AlwaysUseMutable = shouldGuaranteeTCO(
2704 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
2705 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
2706 EVT ValVT;
2707 MVT PtrVT = getPointerTy(DAG.getDataLayout());
2708
2709 // If value is passed by pointer we have address passed instead of the value
2710 // itself. No need to extend if the mask value and location share the same
2711 // absolute size.
2712 bool ExtendedInMem =
2713 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
2714 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
2715
2716 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
2717 ValVT = VA.getLocVT();
2718 else
2719 ValVT = VA.getValVT();
2720
2721 // Calculate SP offset of interrupt parameter, re-arrange the slot normally
2722 // taken by a return address.
2723 int Offset = 0;
2724 if (CallConv == CallingConv::X86_INTR) {
2725 // X86 interrupts may take one or two arguments.
2726 // On the stack there will be no return address as in regular call.
2727 // Offset of last argument need to be set to -4/-8 bytes.
2728 // Where offset of the first argument out of two, should be set to 0 bytes.
2729 Offset = (Subtarget.is64Bit() ? 8 : 4) * ((i + 1) % Ins.size() - 1);
2730 if (Subtarget.is64Bit() && Ins.size() == 2) {
2731 // The stack pointer needs to be realigned for 64 bit handlers with error
2732 // code, so the argument offset changes by 8 bytes.
2733 Offset += 8;
2734 }
2735 }
2736
2737 // FIXME: For now, all byval parameter objects are marked mutable. This can be
2738 // changed with more analysis.
2739 // In case of tail call optimization mark all arguments mutable. Since they
2740 // could be overwritten by lowering of arguments in case of a tail call.
2741 if (Flags.isByVal()) {
2742 unsigned Bytes = Flags.getByValSize();
2743 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
2744 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
2745 // Adjust SP offset of interrupt parameter.
2746 if (CallConv == CallingConv::X86_INTR) {
2747 MFI.setObjectOffset(FI, Offset);
2748 }
2749 return DAG.getFrameIndex(FI, PtrVT);
2750 }
2751
2752 // This is an argument in memory. We might be able to perform copy elision.
2753 if (Flags.isCopyElisionCandidate()) {
2754 EVT ArgVT = Ins[i].ArgVT;
2755 SDValue PartAddr;
2756 if (Ins[i].PartOffset == 0) {
2757 // If this is a one-part value or the first part of a multi-part value,
2758 // create a stack object for the entire argument value type and return a
2759 // load from our portion of it. This assumes that if the first part of an
2760 // argument is in memory, the rest will also be in memory.
2761 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
2762 /*Immutable=*/false);
2763 PartAddr = DAG.getFrameIndex(FI, PtrVT);
2764 return DAG.getLoad(
2765 ValVT, dl, Chain, PartAddr,
2766 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2767 } else {
2768 // This is not the first piece of an argument in memory. See if there is
2769 // already a fixed stack object including this offset. If so, assume it
2770 // was created by the PartOffset == 0 branch above and create a load from
2771 // the appropriate offset into it.
2772 int64_t PartBegin = VA.getLocMemOffset();
2773 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
2774 int FI = MFI.getObjectIndexBegin();
2775 for (; MFI.isFixedObjectIndex(FI); ++FI) {
2776 int64_t ObjBegin = MFI.getObjectOffset(FI);
2777 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
2778 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
2779 break;
2780 }
2781 if (MFI.isFixedObjectIndex(FI)) {
2782 SDValue Addr =
2783 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
2784 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
2785 return DAG.getLoad(
2786 ValVT, dl, Chain, Addr,
2787 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
2788 Ins[i].PartOffset));
2789 }
2790 }
2791 }
2792
2793 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
2794 VA.getLocMemOffset(), isImmutable);
2795
2796 // Set SExt or ZExt flag.
2797 if (VA.getLocInfo() == CCValAssign::ZExt) {
2798 MFI.setObjectZExt(FI, true);
2799 } else if (VA.getLocInfo() == CCValAssign::SExt) {
2800 MFI.setObjectSExt(FI, true);
2801 }
2802
2803 // Adjust SP offset of interrupt parameter.
2804 if (CallConv == CallingConv::X86_INTR) {
2805 MFI.setObjectOffset(FI, Offset);
2806 }
2807
2808 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
2809 SDValue Val = DAG.getLoad(
2810 ValVT, dl, Chain, FIN,
2811 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
2812 return ExtendedInMem ? DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)
2813 : Val;
2814}
2815
2816// FIXME: Get this from tablegen.
2817static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
2818 const X86Subtarget &Subtarget) {
2819 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2819, __PRETTY_FUNCTION__))
;
2820
2821 if (Subtarget.isCallingConvWin64(CallConv)) {
2822 static const MCPhysReg GPR64ArgRegsWin64[] = {
2823 X86::RCX, X86::RDX, X86::R8, X86::R9
2824 };
2825 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
2826 }
2827
2828 static const MCPhysReg GPR64ArgRegs64Bit[] = {
2829 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
2830 };
2831 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
2832}
2833
2834// FIXME: Get this from tablegen.
2835static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
2836 CallingConv::ID CallConv,
2837 const X86Subtarget &Subtarget) {
2838 assert(Subtarget.is64Bit())((Subtarget.is64Bit()) ? static_cast<void> (0) : __assert_fail
("Subtarget.is64Bit()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2838, __PRETTY_FUNCTION__))
;
2839 if (Subtarget.isCallingConvWin64(CallConv)) {
2840 // The XMM registers which might contain var arg parameters are shadowed
2841 // in their paired GPR. So we only need to save the GPR to their home
2842 // slots.
2843 // TODO: __vectorcall will change this.
2844 return None;
2845 }
2846
2847 const Function *Fn = MF.getFunction();
2848 bool NoImplicitFloatOps = Fn->hasFnAttribute(Attribute::NoImplicitFloat);
2849 bool isSoftFloat = Subtarget.useSoftFloat();
2850 assert(!(isSoftFloat && NoImplicitFloatOps) &&((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2851, __PRETTY_FUNCTION__))
2851 "SSE register cannot be used when SSE is disabled!")((!(isSoftFloat && NoImplicitFloatOps) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(isSoftFloat && NoImplicitFloatOps) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2851, __PRETTY_FUNCTION__))
;
2852 if (isSoftFloat || NoImplicitFloatOps || !Subtarget.hasSSE1())
2853 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
2854 // registers.
2855 return None;
2856
2857 static const MCPhysReg XMMArgRegs64Bit[] = {
2858 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2859 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2860 };
2861 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
2862}
2863
2864#ifndef NDEBUG
2865static bool isSortedByValueNo(const SmallVectorImpl<CCValAssign> &ArgLocs) {
2866 return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
2867 [](const CCValAssign &A, const CCValAssign &B) -> bool {
2868 return A.getValNo() < B.getValNo();
2869 });
2870}
2871#endif
2872
2873SDValue X86TargetLowering::LowerFormalArguments(
2874 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2875 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
2876 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2877 MachineFunction &MF = DAG.getMachineFunction();
2878 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2879 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
2880
2881 const Function *Fn = MF.getFunction();
2882 if (Fn->hasExternalLinkage() &&
2883 Subtarget.isTargetCygMing() &&
2884 Fn->getName() == "main")
2885 FuncInfo->setForceFramePointer(true);
2886
2887 MachineFrameInfo &MFI = MF.getFrameInfo();
2888 bool Is64Bit = Subtarget.is64Bit();
2889 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2890
2891 assert(((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2893, __PRETTY_FUNCTION__))
2892 !(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2893, __PRETTY_FUNCTION__))
2893 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling conv' regcall, fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling conv' regcall, fastcc, ghc or hipe\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2893, __PRETTY_FUNCTION__))
;
2894
2895 if (CallConv == CallingConv::X86_INTR) {
2896 bool isLegal = Ins.size() == 1 ||
2897 (Ins.size() == 2 && ((Is64Bit && Ins[1].VT == MVT::i64) ||
2898 (!Is64Bit && Ins[1].VT == MVT::i32)));
2899 if (!isLegal)
2900 report_fatal_error("X86 interrupts may take one or two arguments");
2901 }
2902
2903 // Assign locations to all of the incoming arguments.
2904 SmallVector<CCValAssign, 16> ArgLocs;
2905 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2906
2907 // Allocate shadow area for Win64.
2908 if (IsWin64)
2909 CCInfo.AllocateStack(32, 8);
2910
2911 CCInfo.AnalyzeArguments(Ins, CC_X86);
2912
2913 // In vectorcall calling convention a second pass is required for the HVA
2914 // types.
2915 if (CallingConv::X86_VectorCall == CallConv) {
2916 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
2917 }
2918
2919 // The next loop assumes that the locations are in the same order of the
2920 // input arguments.
2921 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2922, __PRETTY_FUNCTION__))
2922 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2922, __PRETTY_FUNCTION__))
;
2923
2924 SDValue ArgValue;
2925 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
2926 ++I, ++InsIndex) {
2927 assert(InsIndex < Ins.size() && "Invalid Ins index")((InsIndex < Ins.size() && "Invalid Ins index") ? static_cast
<void> (0) : __assert_fail ("InsIndex < Ins.size() && \"Invalid Ins index\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2927, __PRETTY_FUNCTION__))
;
2928 CCValAssign &VA = ArgLocs[I];
2929
2930 if (VA.isRegLoc()) {
2931 EVT RegVT = VA.getLocVT();
2932 if (VA.needsCustom()) {
2933 assert(((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2935, __PRETTY_FUNCTION__))
2934 VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2935, __PRETTY_FUNCTION__))
2935 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2935, __PRETTY_FUNCTION__))
;
2936
2937 // v64i1 values, in regcall calling convention, that are
2938 // compiled to 32 bit arch, are split up into two registers.
2939 ArgValue =
2940 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
2941 } else {
2942 const TargetRegisterClass *RC;
2943 if (RegVT == MVT::i32)
2944 RC = &X86::GR32RegClass;
2945 else if (Is64Bit && RegVT == MVT::i64)
2946 RC = &X86::GR64RegClass;
2947 else if (RegVT == MVT::f32)
2948 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
2949 else if (RegVT == MVT::f64)
2950 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
2951 else if (RegVT == MVT::f80)
2952 RC = &X86::RFP80RegClass;
2953 else if (RegVT == MVT::f128)
2954 RC = &X86::FR128RegClass;
2955 else if (RegVT.is512BitVector())
2956 RC = &X86::VR512RegClass;
2957 else if (RegVT.is256BitVector())
2958 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
2959 else if (RegVT.is128BitVector())
2960 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
2961 else if (RegVT == MVT::x86mmx)
2962 RC = &X86::VR64RegClass;
2963 else if (RegVT == MVT::i1)
2964 RC = &X86::VK1RegClass;
2965 else if (RegVT == MVT::v8i1)
2966 RC = &X86::VK8RegClass;
2967 else if (RegVT == MVT::v16i1)
2968 RC = &X86::VK16RegClass;
2969 else if (RegVT == MVT::v32i1)
2970 RC = &X86::VK32RegClass;
2971 else if (RegVT == MVT::v64i1)
2972 RC = &X86::VK64RegClass;
2973 else
2974 llvm_unreachable("Unknown argument type!")::llvm::llvm_unreachable_internal("Unknown argument type!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 2974)
;
2975
2976 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2977 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
2978 }
2979
2980 // If this is an 8 or 16-bit value, it is really passed promoted to 32
2981 // bits. Insert an assert[sz]ext to capture this, then truncate to the
2982 // right size.
2983 if (VA.getLocInfo() == CCValAssign::SExt)
2984 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
2985 DAG.getValueType(VA.getValVT()));
2986 else if (VA.getLocInfo() == CCValAssign::ZExt)
2987 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
2988 DAG.getValueType(VA.getValVT()));
2989 else if (VA.getLocInfo() == CCValAssign::BCvt)
2990 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
2991
2992 if (VA.isExtInLoc()) {
2993 // Handle MMX values passed in XMM regs.
2994 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
2995 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
2996 else if (VA.getValVT().isVector() &&
2997 VA.getValVT().getScalarType() == MVT::i1 &&
2998 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
2999 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3000 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3001 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3002 } else
3003 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3004 }
3005 } else {
3006 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3006, __PRETTY_FUNCTION__))
;
3007 ArgValue =
3008 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3009 }
3010
3011 // If value is passed via pointer - do a load.
3012 if (VA.getLocInfo() == CCValAssign::Indirect)
3013 ArgValue =
3014 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3015
3016 InVals.push_back(ArgValue);
3017 }
3018
3019 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3020 // Swift calling convention does not require we copy the sret argument
3021 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3022 if (CallConv == CallingConv::Swift)
3023 continue;
3024
3025 // All x86 ABIs require that for returning structs by value we copy the
3026 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3027 // the argument into a virtual register so that we can access it from the
3028 // return points.
3029 if (Ins[I].Flags.isSRet()) {
3030 unsigned Reg = FuncInfo->getSRetReturnReg();
3031 if (!Reg) {
3032 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3033 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3034 FuncInfo->setSRetReturnReg(Reg);
3035 }
3036 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3037 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3038 break;
3039 }
3040 }
3041
3042 unsigned StackSize = CCInfo.getNextStackOffset();
3043 // Align stack specially for tail calls.
3044 if (shouldGuaranteeTCO(CallConv,
3045 MF.getTarget().Options.GuaranteedTailCallOpt))
3046 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3047
3048 // If the function takes variable number of arguments, make a frame index for
3049 // the start of the first vararg value... for expansion of llvm.va_start. We
3050 // can skip this if there are no va_start calls.
3051 if (MFI.hasVAStart() &&
3052 (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
3053 CallConv != CallingConv::X86_ThisCall))) {
3054 FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
3055 }
3056
3057 // Figure out if XMM registers are in use.
3058 assert(!(Subtarget.useSoftFloat() &&((!(Subtarget.useSoftFloat() && Fn->hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3060, __PRETTY_FUNCTION__))
3059 Fn->hasFnAttribute(Attribute::NoImplicitFloat)) &&((!(Subtarget.useSoftFloat() && Fn->hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3060, __PRETTY_FUNCTION__))
3060 "SSE register cannot be used when SSE is disabled!")((!(Subtarget.useSoftFloat() && Fn->hasFnAttribute
(Attribute::NoImplicitFloat)) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(Subtarget.useSoftFloat() && Fn->hasFnAttribute(Attribute::NoImplicitFloat)) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3060, __PRETTY_FUNCTION__))
;
3061
3062 // 64-bit calling conventions support varargs and register parameters, so we
3063 // have to do extra work to spill them in the prologue.
3064 if (Is64Bit && isVarArg && MFI.hasVAStart()) {
3065 // Find the first unallocated argument registers.
3066 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3067 ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
3068 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3069 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3070 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3071, __PRETTY_FUNCTION__))
3071 "SSE register cannot be used when SSE is disabled!")((!(NumXMMRegs && !Subtarget.hasSSE1()) && "SSE register cannot be used when SSE is disabled!"
) ? static_cast<void> (0) : __assert_fail ("!(NumXMMRegs && !Subtarget.hasSSE1()) && \"SSE register cannot be used when SSE is disabled!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3071, __PRETTY_FUNCTION__))
;
3072
3073 // Gather all the live in physical registers.
3074 SmallVector<SDValue, 6> LiveGPRs;
3075 SmallVector<SDValue, 8> LiveXMMRegs;
3076 SDValue ALVal;
3077 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3078 unsigned GPR = MF.addLiveIn(Reg, &X86::GR64RegClass);
3079 LiveGPRs.push_back(
3080 DAG.getCopyFromReg(Chain, dl, GPR, MVT::i64));
3081 }
3082 if (!ArgXMMs.empty()) {
3083 unsigned AL = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3084 ALVal = DAG.getCopyFromReg(Chain, dl, AL, MVT::i8);
3085 for (MCPhysReg Reg : ArgXMMs.slice(NumXMMRegs)) {
3086 unsigned XMMReg = MF.addLiveIn(Reg, &X86::VR128RegClass);
3087 LiveXMMRegs.push_back(
3088 DAG.getCopyFromReg(Chain, dl, XMMReg, MVT::v4f32));
3089 }
3090 }
3091
3092 if (IsWin64) {
3093 // Get to the caller-allocated home save location. Add 8 to account
3094 // for the return address.
3095 int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
3096 FuncInfo->setRegSaveFrameIndex(
3097 MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3098 // Fixup to set vararg frame on shadow area (4 x i64).
3099 if (NumIntRegs < 4)
3100 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3101 } else {
3102 // For X86-64, if there are vararg parameters that are passed via
3103 // registers, then we must store them to their spots on the stack so
3104 // they may be loaded by dereferencing the result of va_next.
3105 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3106 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3107 FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
3108 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
3109 }
3110
3111 // Store the integer parameter registers.
3112 SmallVector<SDValue, 8> MemOps;
3113 SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3114 getPointerTy(DAG.getDataLayout()));
3115 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3116 for (SDValue Val : LiveGPRs) {
3117 SDValue FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3118 RSFIN, DAG.getIntPtrConstant(Offset, dl));
3119 SDValue Store =
3120 DAG.getStore(Val.getValue(1), dl, Val, FIN,
3121 MachinePointerInfo::getFixedStack(
3122 DAG.getMachineFunction(),
3123 FuncInfo->getRegSaveFrameIndex(), Offset));
3124 MemOps.push_back(Store);
3125 Offset += 8;
3126 }
3127
3128 if (!ArgXMMs.empty() && NumXMMRegs != ArgXMMs.size()) {
3129 // Now store the XMM (fp + vector) parameter registers.
3130 SmallVector<SDValue, 12> SaveXMMOps;
3131 SaveXMMOps.push_back(Chain);
3132 SaveXMMOps.push_back(ALVal);
3133 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3134 FuncInfo->getRegSaveFrameIndex(), dl));
3135 SaveXMMOps.push_back(DAG.getIntPtrConstant(
3136 FuncInfo->getVarArgsFPOffset(), dl));
3137 SaveXMMOps.insert(SaveXMMOps.end(), LiveXMMRegs.begin(),
3138 LiveXMMRegs.end());
3139 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, dl,
3140 MVT::Other, SaveXMMOps));
3141 }
3142
3143 if (!MemOps.empty())
3144 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3145 }
3146
3147 if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
3148 // Find the largest legal vector type.
3149 MVT VecVT = MVT::Other;
3150 // FIXME: Only some x86_32 calling conventions support AVX512.
3151 if (Subtarget.hasAVX512() &&
3152 (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
3153 CallConv == CallingConv::Intel_OCL_BI)))
3154 VecVT = MVT::v16f32;
3155 else if (Subtarget.hasAVX())
3156 VecVT = MVT::v8f32;
3157 else if (Subtarget.hasSSE2())
3158 VecVT = MVT::v4f32;
3159
3160 // We forward some GPRs and some vector types.
3161 SmallVector<MVT, 2> RegParmTypes;
3162 MVT IntVT = Is64Bit ? MVT::i64 : MVT::i32;
3163 RegParmTypes.push_back(IntVT);
3164 if (VecVT != MVT::Other)
3165 RegParmTypes.push_back(VecVT);
3166
3167 // Compute the set of forwarded registers. The rest are scratch.
3168 SmallVectorImpl<ForwardedRegister> &Forwards =
3169 FuncInfo->getForwardedMustTailRegParms();
3170 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3171
3172 // Conservatively forward AL on x86_64, since it might be used for varargs.
3173 if (Is64Bit && !CCInfo.isAllocated(X86::AL)) {
3174 unsigned ALVReg = MF.addLiveIn(X86::AL, &X86::GR8RegClass);
3175 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3176 }
3177
3178 // Copy all forwards from physical to virtual registers.
3179 for (ForwardedRegister &F : Forwards) {
3180 // FIXME: Can we use a less constrained schedule?
3181 SDValue RegVal = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3182 F.VReg = MF.getRegInfo().createVirtualRegister(getRegClassFor(F.VT));
3183 Chain = DAG.getCopyToReg(Chain, dl, F.VReg, RegVal);
3184 }
3185 }
3186
3187 // Some CCs need callee pop.
3188 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3189 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3190 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3191 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3192 // X86 interrupts must pop the error code (and the alignment padding) if
3193 // present.
3194 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3195 } else {
3196 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3197 // If this is an sret function, the return should pop the hidden pointer.
3198 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3199 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3200 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3201 FuncInfo->setBytesToPopOnReturn(4);
3202 }
3203
3204 if (!Is64Bit) {
3205 // RegSaveFrameIndex is X86-64 only.
3206 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3207 if (CallConv == CallingConv::X86_FastCall ||
3208 CallConv == CallingConv::X86_ThisCall)
3209 // fastcc functions can't have varargs.
3210 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3211 }
3212
3213 FuncInfo->setArgumentStackSize(StackSize);
3214
3215 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3216 EHPersonality Personality = classifyEHPersonality(Fn->getPersonalityFn());
3217 if (Personality == EHPersonality::CoreCLR) {
3218 assert(Is64Bit)((Is64Bit) ? static_cast<void> (0) : __assert_fail ("Is64Bit"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3218, __PRETTY_FUNCTION__))
;
3219 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3220 // that we'd prefer this slot be allocated towards the bottom of the frame
3221 // (i.e. near the stack pointer after allocating the frame). Every
3222 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3223 // offset from the bottom of this and each funclet's frame must be the
3224 // same, so the size of funclets' (mostly empty) frames is dictated by
3225 // how far this slot is from the bottom (since they allocate just enough
3226 // space to accommodate holding this slot at the correct offset).
3227 int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
3228 EHInfo->PSPSymFrameIdx = PSPSymFI;
3229 }
3230 }
3231
3232 if (CallConv == CallingConv::X86_RegCall ||
3233 Fn->hasFnAttribute("no_caller_saved_registers")) {
3234 const MachineRegisterInfo &MRI = MF.getRegInfo();
3235 for (const auto &Pair : make_range(MRI.livein_begin(), MRI.livein_end()))
3236 MF.getRegInfo().disableCalleeSavedRegister(Pair.first);
3237 }
3238
3239 return Chain;
3240}
3241
3242SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3243 SDValue Arg, const SDLoc &dl,
3244 SelectionDAG &DAG,
3245 const CCValAssign &VA,
3246 ISD::ArgFlagsTy Flags) const {
3247 unsigned LocMemOffset = VA.getLocMemOffset();
3248 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3249 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3250 StackPtr, PtrOff);
3251 if (Flags.isByVal())
3252 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3253
3254 return DAG.getStore(
3255 Chain, dl, Arg, PtrOff,
3256 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3257}
3258
3259/// Emit a load of return address if tail call
3260/// optimization is performed and it is required.
3261SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3262 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3263 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3264 // Adjust the Return address stack slot.
3265 EVT VT = getPointerTy(DAG.getDataLayout());
3266 OutRetAddr = getReturnAddressFrameIndex(DAG);
3267
3268 // Load the "old" Return address.
3269 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3270 return SDValue(OutRetAddr.getNode(), 1);
3271}
3272
3273/// Emit a store of the return address if tail call
3274/// optimization is performed and it is required (FPDiff!=0).
3275static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3276 SDValue Chain, SDValue RetAddrFrIdx,
3277 EVT PtrVT, unsigned SlotSize,
3278 int FPDiff, const SDLoc &dl) {
3279 // Store the return address to the appropriate stack slot.
3280 if (!FPDiff) return Chain;
3281 // Calculate the new stack slot for the return address.
3282 int NewReturnAddrFI =
3283 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3284 false);
3285 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3286 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3287 MachinePointerInfo::getFixedStack(
3288 DAG.getMachineFunction(), NewReturnAddrFI));
3289 return Chain;
3290}
3291
3292/// Returns a vector_shuffle mask for an movs{s|d}, movd
3293/// operation of specified width.
3294static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3295 SDValue V2) {
3296 unsigned NumElems = VT.getVectorNumElements();
3297 SmallVector<int, 8> Mask;
3298 Mask.push_back(NumElems);
3299 for (unsigned i = 1; i != NumElems; ++i)
3300 Mask.push_back(i);
3301 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3302}
3303
3304SDValue
3305X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3306 SmallVectorImpl<SDValue> &InVals) const {
3307 SelectionDAG &DAG = CLI.DAG;
3308 SDLoc &dl = CLI.DL;
3309 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3310 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3311 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3312 SDValue Chain = CLI.Chain;
3313 SDValue Callee = CLI.Callee;
3314 CallingConv::ID CallConv = CLI.CallConv;
3315 bool &isTailCall = CLI.IsTailCall;
3316 bool isVarArg = CLI.IsVarArg;
3317
3318 MachineFunction &MF = DAG.getMachineFunction();
3319 bool Is64Bit = Subtarget.is64Bit();
3320 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3321 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3322 bool IsSibcall = false;
3323 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3324 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
3325 const CallInst *CI =
3326 CLI.CS ? dyn_cast<CallInst>(CLI.CS->getInstruction()) : nullptr;
3327 const Function *Fn = CI ? CI->getCalledFunction() : nullptr;
3328 bool HasNCSR = (CI && CI->hasFnAttr("no_caller_saved_registers")) ||
3329 (Fn && Fn->hasFnAttribute("no_caller_saved_registers"));
3330
3331 if (CallConv == CallingConv::X86_INTR)
3332 report_fatal_error("X86 interrupts may not be called directly");
3333
3334 if (Attr.getValueAsString() == "true")
3335 isTailCall = false;
3336
3337 if (Subtarget.isPICStyleGOT() &&
3338 !MF.getTarget().Options.GuaranteedTailCallOpt) {
3339 // If we are using a GOT, disable tail calls to external symbols with
3340 // default visibility. Tail calling such a symbol requires using a GOT
3341 // relocation, which forces early binding of the symbol. This breaks code
3342 // that require lazy function symbol resolution. Using musttail or
3343 // GuaranteedTailCallOpt will override this.
3344 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3345 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3346 G->getGlobal()->hasDefaultVisibility()))
3347 isTailCall = false;
3348 }
3349
3350 bool IsMustTail = CLI.CS && CLI.CS->isMustTailCall();
3351 if (IsMustTail) {
3352 // Force this to be a tail call. The verifier rules are enough to ensure
3353 // that we can lower this successfully without moving the return address
3354 // around.
3355 isTailCall = true;
3356 } else if (isTailCall) {
3357 // Check if it's really possible to do a tail call.
3358 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3359 isVarArg, SR != NotStructReturn,
3360 MF.getFunction()->hasStructRetAttr(), CLI.RetTy,
3361 Outs, OutVals, Ins, DAG);
3362
3363 // Sibcalls are automatically detected tailcalls which do not require
3364 // ABI changes.
3365 if (!MF.getTarget().Options.GuaranteedTailCallOpt && isTailCall)
3366 IsSibcall = true;
3367
3368 if (isTailCall)
3369 ++NumTailCalls;
3370 }
3371
3372 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3373, __PRETTY_FUNCTION__))
3373 "Var args not supported with calling convention fastcc, ghc or hipe")((!(isVarArg && canGuaranteeTCO(CallConv)) &&
"Var args not supported with calling convention fastcc, ghc or hipe"
) ? static_cast<void> (0) : __assert_fail ("!(isVarArg && canGuaranteeTCO(CallConv)) && \"Var args not supported with calling convention fastcc, ghc or hipe\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3373, __PRETTY_FUNCTION__))
;
3374
3375 // Analyze operands of the call, assigning locations to each operand.
3376 SmallVector<CCValAssign, 16> ArgLocs;
3377 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3378
3379 // Allocate shadow area for Win64.
3380 if (IsWin64)
3381 CCInfo.AllocateStack(32, 8);
3382
3383 CCInfo.AnalyzeArguments(Outs, CC_X86);
3384
3385 // In vectorcall calling convention a second pass is required for the HVA
3386 // types.
3387 if (CallingConv::X86_VectorCall == CallConv) {
3388 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3389 }
3390
3391 // Get a count of how many bytes are to be pushed on the stack.
3392 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3393 if (IsSibcall)
3394 // This is a sibcall. The memory operands are available in caller's
3395 // own caller's stack.
3396 NumBytes = 0;
3397 else if (MF.getTarget().Options.GuaranteedTailCallOpt &&
3398 canGuaranteeTCO(CallConv))
3399 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
3400
3401 int FPDiff = 0;
3402 if (isTailCall && !IsSibcall && !IsMustTail) {
3403 // Lower arguments at fp - stackoffset + fpdiff.
3404 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
3405
3406 FPDiff = NumBytesCallerPushed - NumBytes;
3407
3408 // Set the delta of movement of the returnaddr stackslot.
3409 // But only set if delta is greater than previous delta.
3410 if (FPDiff < X86Info->getTCReturnAddrDelta())
3411 X86Info->setTCReturnAddrDelta(FPDiff);
3412 }
3413
3414 unsigned NumBytesToPush = NumBytes;
3415 unsigned NumBytesToPop = NumBytes;
3416
3417 // If we have an inalloca argument, all stack space has already been allocated
3418 // for us and be right at the top of the stack. We don't support multiple
3419 // arguments passed in memory when using inalloca.
3420 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
3421 NumBytesToPush = 0;
3422 if (!ArgLocs.back().isMemLoc())
3423 report_fatal_error("cannot use inalloca attribute on a register "
3424 "parameter");
3425 if (ArgLocs.back().getLocMemOffset() != 0)
3426 report_fatal_error("any parameter with the inalloca attribute must be "
3427 "the only memory argument");
3428 }
3429
3430 if (!IsSibcall)
3431 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
3432 NumBytes - NumBytesToPush, dl);
3433
3434 SDValue RetAddrFrIdx;
3435 // Load return address for tail calls.
3436 if (isTailCall && FPDiff)
3437 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
3438 Is64Bit, FPDiff, dl);
3439
3440 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3441 SmallVector<SDValue, 8> MemOpChains;
3442 SDValue StackPtr;
3443
3444 // The next loop assumes that the locations are in the same order of the
3445 // input arguments.
3446 assert(isSortedByValueNo(ArgLocs) &&((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3447, __PRETTY_FUNCTION__))
3447 "Argument Location list must be sorted before lowering")((isSortedByValueNo(ArgLocs) && "Argument Location list must be sorted before lowering"
) ? static_cast<void> (0) : __assert_fail ("isSortedByValueNo(ArgLocs) && \"Argument Location list must be sorted before lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3447, __PRETTY_FUNCTION__))
;
3448
3449 // Walk the register/memloc assignments, inserting copies/loads. In the case
3450 // of tail call optimization arguments are handle later.
3451 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3452 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
3453 ++I, ++OutIndex) {
3454 assert(OutIndex < Outs.size() && "Invalid Out index")((OutIndex < Outs.size() && "Invalid Out index") ?
static_cast<void> (0) : __assert_fail ("OutIndex < Outs.size() && \"Invalid Out index\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3454, __PRETTY_FUNCTION__))
;
3455 // Skip inalloca arguments, they have already been written.
3456 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
3457 if (Flags.isInAlloca())
3458 continue;
3459
3460 CCValAssign &VA = ArgLocs[I];
3461 EVT RegVT = VA.getLocVT();
3462 SDValue Arg = OutVals[OutIndex];
3463 bool isByVal = Flags.isByVal();
3464
3465 // Promote the value if needed.
3466 switch (VA.getLocInfo()) {
3467 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3467)
;
3468 case CCValAssign::Full: break;
3469 case CCValAssign::SExt:
3470 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
3471 break;
3472 case CCValAssign::ZExt:
3473 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
3474 break;
3475 case CCValAssign::AExt:
3476 if (Arg.getValueType().isVector() &&
3477 Arg.getValueType().getVectorElementType() == MVT::i1)
3478 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
3479 else if (RegVT.is128BitVector()) {
3480 // Special case: passing MMX values in XMM registers.
3481 Arg = DAG.getBitcast(MVT::i64, Arg);
3482 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
3483 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
3484 } else
3485 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
3486 break;
3487 case CCValAssign::BCvt:
3488 Arg = DAG.getBitcast(RegVT, Arg);
3489 break;
3490 case CCValAssign::Indirect: {
3491 // Store the argument.
3492 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
3493 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3494 Chain = DAG.getStore(
3495 Chain, dl, Arg, SpillSlot,
3496 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3497 Arg = SpillSlot;
3498 break;
3499 }
3500 }
3501
3502 if (VA.needsCustom()) {
3503 assert(VA.getValVT() == MVT::v64i1 &&((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3504, __PRETTY_FUNCTION__))
3504 "Currently the only custom case is when we split v64i1 to 2 regs")((VA.getValVT() == MVT::v64i1 && "Currently the only custom case is when we split v64i1 to 2 regs"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::v64i1 && \"Currently the only custom case is when we split v64i1 to 2 regs\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3504, __PRETTY_FUNCTION__))
;
3505 // Split v64i1 value into two registers
3506 Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
3507 Subtarget);
3508 } else if (VA.isRegLoc()) {
3509 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3510 if (isVarArg && IsWin64) {
3511 // Win64 ABI requires argument XMM reg to be copied to the corresponding
3512 // shadow reg if callee is a varargs function.
3513 unsigned ShadowReg = 0;
3514 switch (VA.getLocReg()) {
3515 case X86::XMM0: ShadowReg = X86::RCX; break;
3516 case X86::XMM1: ShadowReg = X86::RDX; break;
3517 case X86::XMM2: ShadowReg = X86::R8; break;
3518 case X86::XMM3: ShadowReg = X86::R9; break;
3519 }
3520 if (ShadowReg)
3521 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
3522 }
3523 } else if (!IsSibcall && (!isTailCall || isByVal)) {
3524 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3524, __PRETTY_FUNCTION__))
;
3525 if (!StackPtr.getNode())
3526 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3527 getPointerTy(DAG.getDataLayout()));
3528 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
3529 dl, DAG, VA, Flags));
3530 }
3531 }
3532
3533 if (!MemOpChains.empty())
3534 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
3535
3536 if (Subtarget.isPICStyleGOT()) {
3537 // ELF / PIC requires GOT in the EBX register before function calls via PLT
3538 // GOT pointer.
3539 if (!isTailCall) {
3540 RegsToPass.push_back(std::make_pair(
3541 unsigned(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
3542 getPointerTy(DAG.getDataLayout()))));
3543 } else {
3544 // If we are tail calling and generating PIC/GOT style code load the
3545 // address of the callee into ECX. The value in ecx is used as target of
3546 // the tail jump. This is done to circumvent the ebx/callee-saved problem
3547 // for tail calls on PIC/GOT architectures. Normally we would just put the
3548 // address of GOT into ebx and then call target@PLT. But for tail calls
3549 // ebx would be restored (since ebx is callee saved) before jumping to the
3550 // target@PLT.
3551
3552 // Note: The actual moving to ECX is done further down.
3553 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3554 if (G && !G->getGlobal()->hasLocalLinkage() &&
3555 G->getGlobal()->hasDefaultVisibility())
3556 Callee = LowerGlobalAddress(Callee, DAG);
3557 else if (isa<ExternalSymbolSDNode>(Callee))
3558 Callee = LowerExternalSymbol(Callee, DAG);
3559 }
3560 }
3561
3562 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
3563 // From AMD64 ABI document:
3564 // For calls that may call functions that use varargs or stdargs
3565 // (prototype-less calls or calls to functions containing ellipsis (...) in
3566 // the declaration) %al is used as hidden argument to specify the number
3567 // of SSE registers used. The contents of %al do not need to match exactly
3568 // the number of registers, but must be an ubound on the number of SSE
3569 // registers used and is in the range 0 - 8 inclusive.
3570
3571 // Count the number of XMM registers allocated.
3572 static const MCPhysReg XMMArgRegs[] = {
3573 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3574 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3575 };
3576 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
3577 assert((Subtarget.hasSSE1() || !NumXMMRegs)(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3578, __PRETTY_FUNCTION__))
3578 && "SSE registers cannot be used when SSE is disabled")(((Subtarget.hasSSE1() || !NumXMMRegs) && "SSE registers cannot be used when SSE is disabled"
) ? static_cast<void> (0) : __assert_fail ("(Subtarget.hasSSE1() || !NumXMMRegs) && \"SSE registers cannot be used when SSE is disabled\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3578, __PRETTY_FUNCTION__))
;
3579
3580 RegsToPass.push_back(std::make_pair(unsigned(X86::AL),
3581 DAG.getConstant(NumXMMRegs, dl,
3582 MVT::i8)));
3583 }
3584
3585 if (isVarArg && IsMustTail) {
3586 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
3587 for (const auto &F : Forwards) {
3588 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
3589 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3590 }
3591 }
3592
3593 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
3594 // don't need this because the eligibility check rejects calls that require
3595 // shuffling arguments passed in memory.
3596 if (!IsSibcall && isTailCall) {
3597 // Force all the incoming stack arguments to be loaded from the stack
3598 // before any new outgoing arguments are stored to the stack, because the
3599 // outgoing stack slots may alias the incoming argument stack slots, and
3600 // the alias isn't otherwise explicit. This is slightly more conservative
3601 // than necessary, because it means that each store effectively depends
3602 // on every argument instead of just those arguments it would clobber.
3603 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
3604
3605 SmallVector<SDValue, 8> MemOpChains2;
3606 SDValue FIN;
3607 int FI = 0;
3608 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
3609 ++I, ++OutsIndex) {
3610 CCValAssign &VA = ArgLocs[I];
3611
3612 if (VA.isRegLoc()) {
3613 if (VA.needsCustom()) {
3614 assert((CallConv == CallingConv::X86_RegCall) &&(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3615, __PRETTY_FUNCTION__))
3615 "Expecting custom case only in regcall calling convention")(((CallConv == CallingConv::X86_RegCall) && "Expecting custom case only in regcall calling convention"
) ? static_cast<void> (0) : __assert_fail ("(CallConv == CallingConv::X86_RegCall) && \"Expecting custom case only in regcall calling convention\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3615, __PRETTY_FUNCTION__))
;
3616 // This means that we are in special case where one argument was
3617 // passed through two register locations - Skip the next location
3618 ++I;
3619 }
3620
3621 continue;
3622 }
3623
3624 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3624, __PRETTY_FUNCTION__))
;
3625 SDValue Arg = OutVals[OutsIndex];
3626 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
3627 // Skip inalloca arguments. They don't require any work.
3628 if (Flags.isInAlloca())
3629 continue;
3630 // Create frame index.
3631 int32_t Offset = VA.getLocMemOffset()+FPDiff;
3632 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
3633 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3634 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3635
3636 if (Flags.isByVal()) {
3637 // Copy relative to framepointer.
3638 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
3639 if (!StackPtr.getNode())
3640 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
3641 getPointerTy(DAG.getDataLayout()));
3642 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3643 StackPtr, Source);
3644
3645 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
3646 ArgChain,
3647 Flags, DAG, dl));
3648 } else {
3649 // Store relative to framepointer.
3650 MemOpChains2.push_back(DAG.getStore(
3651 ArgChain, dl, Arg, FIN,
3652 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
3653 }
3654 }
3655
3656 if (!MemOpChains2.empty())
3657 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
3658
3659 // Store the return address to the appropriate stack slot.
3660 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
3661 getPointerTy(DAG.getDataLayout()),
3662 RegInfo->getSlotSize(), FPDiff, dl);
3663 }
3664
3665 // Build a sequence of copy-to-reg nodes chained together with token chain
3666 // and flag operands which copy the outgoing args into registers.
3667 SDValue InFlag;
3668 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
3669 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
3670 RegsToPass[i].second, InFlag);
3671 InFlag = Chain.getValue(1);
3672 }
3673
3674 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
3675 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((Is64Bit && "Large code model is only legal in 64-bit mode."
) ? static_cast<void> (0) : __assert_fail ("Is64Bit && \"Large code model is only legal in 64-bit mode.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3675, __PRETTY_FUNCTION__))
;
3676 // In the 64-bit large code model, we have to make all calls
3677 // through a register, since the call instruction's 32-bit
3678 // pc-relative offset may not be large enough to hold the whole
3679 // address.
3680 } else if (Callee->getOpcode() == ISD::GlobalAddress) {
3681 // If the callee is a GlobalAddress node (quite common, every direct call
3682 // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
3683 // it.
3684 GlobalAddressSDNode* G = cast<GlobalAddressSDNode>(Callee);
3685
3686 // We should use extra load for direct calls to dllimported functions in
3687 // non-JIT mode.
3688 const GlobalValue *GV = G->getGlobal();
3689 if (!GV->hasDLLImportStorageClass()) {
3690 unsigned char OpFlags = Subtarget.classifyGlobalFunctionReference(GV);
3691
3692 Callee = DAG.getTargetGlobalAddress(
3693 GV, dl, getPointerTy(DAG.getDataLayout()), G->getOffset(), OpFlags);
3694
3695 if (OpFlags == X86II::MO_GOTPCREL) {
3696 // Add a wrapper.
3697 Callee = DAG.getNode(X86ISD::WrapperRIP, dl,
3698 getPointerTy(DAG.getDataLayout()), Callee);
3699 // Add extra indirection
3700 Callee = DAG.getLoad(
3701 getPointerTy(DAG.getDataLayout()), dl, DAG.getEntryNode(), Callee,
3702 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3703 }
3704 }
3705 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3706 const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
3707 unsigned char OpFlags =
3708 Subtarget.classifyGlobalFunctionReference(nullptr, *Mod);
3709
3710 Callee = DAG.getTargetExternalSymbol(
3711 S->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlags);
3712 } else if (Subtarget.isTarget64BitILP32() &&
3713 Callee->getValueType(0) == MVT::i32) {
3714 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
3715 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
3716 }
3717
3718 // Returns a chain & a flag for retval copy to use.
3719 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3720 SmallVector<SDValue, 8> Ops;
3721
3722 if (!IsSibcall && isTailCall) {
3723 Chain = DAG.getCALLSEQ_END(Chain,
3724 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3725 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
3726 InFlag = Chain.getValue(1);
3727 }
3728
3729 Ops.push_back(Chain);
3730 Ops.push_back(Callee);
3731
3732 if (isTailCall)
3733 Ops.push_back(DAG.getConstant(FPDiff, dl, MVT::i32));
3734
3735 // Add argument registers to the end of the list so that they are known live
3736 // into the call.
3737 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
3738 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
3739 RegsToPass[i].second.getValueType()));
3740
3741 // Add a register mask operand representing the call-preserved registers.
3742 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists) then we
3743 // set X86_INTR calling convention because it has the same CSR mask
3744 // (same preserved registers).
3745 const uint32_t *Mask = RegInfo->getCallPreservedMask(
3746 MF, HasNCSR ? (CallingConv::ID)CallingConv::X86_INTR : CallConv);
3747 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3747, __PRETTY_FUNCTION__))
;
3748
3749 // If this is an invoke in a 32-bit function using a funclet-based
3750 // personality, assume the function clobbers all registers. If an exception
3751 // is thrown, the runtime will not restore CSRs.
3752 // FIXME: Model this more precisely so that we can register allocate across
3753 // the normal edge and spill and fill across the exceptional edge.
3754 if (!Is64Bit && CLI.CS && CLI.CS->isInvoke()) {
3755 const Function *CallerFn = MF.getFunction();
3756 EHPersonality Pers =
3757 CallerFn->hasPersonalityFn()
3758 ? classifyEHPersonality(CallerFn->getPersonalityFn())
3759 : EHPersonality::Unknown;
3760 if (isFuncletEHPersonality(Pers))
3761 Mask = RegInfo->getNoPreservedMask();
3762 }
3763
3764 // Define a new register mask from the existing mask.
3765 uint32_t *RegMask = nullptr;
3766
3767 // In some calling conventions we need to remove the used physical registers
3768 // from the reg mask.
3769 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
3770 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3771
3772 // Allocate a new Reg Mask and copy Mask.
3773 RegMask = MF.allocateRegisterMask(TRI->getNumRegs());
3774 unsigned RegMaskSize = (TRI->getNumRegs() + 31) / 32;
3775 memcpy(RegMask, Mask, sizeof(uint32_t) * RegMaskSize);
3776
3777 // Make sure all sub registers of the argument registers are reset
3778 // in the RegMask.
3779 for (auto const &RegPair : RegsToPass)
3780 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
3781 SubRegs.isValid(); ++SubRegs)
3782 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3783
3784 // Create the RegMask Operand according to our updated mask.
3785 Ops.push_back(DAG.getRegisterMask(RegMask));
3786 } else {
3787 // Create the RegMask Operand according to the static mask.
3788 Ops.push_back(DAG.getRegisterMask(Mask));
3789 }
3790
3791 if (InFlag.getNode())
3792 Ops.push_back(InFlag);
3793
3794 if (isTailCall) {
3795 // We used to do:
3796 //// If this is the first return lowered for this function, add the regs
3797 //// to the liveout set for the function.
3798 // This isn't right, although it's probably harmless on x86; liveouts
3799 // should be computed from returns not tail calls. Consider a void
3800 // function making a tail call to a function returning int.
3801 MF.getFrameInfo().setHasTailCall();
3802 return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
3803 }
3804
3805 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
3806 InFlag = Chain.getValue(1);
3807
3808 // Create the CALLSEQ_END node.
3809 unsigned NumBytesForCalleeToPop;
3810 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
3811 DAG.getTarget().Options.GuaranteedTailCallOpt))
3812 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
3813 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3814 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3815 SR == StackStructReturn)
3816 // If this is a call to a struct-return function, the callee
3817 // pops the hidden struct pointer, so we have to push it back.
3818 // This is common for Darwin/X86, Linux & Mingw32 targets.
3819 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
3820 NumBytesForCalleeToPop = 4;
3821 else
3822 NumBytesForCalleeToPop = 0; // Callee pops nothing.
3823
3824 if (CLI.DoesNotReturn && !getTargetMachine().Options.TrapUnreachable) {
3825 // No need to reset the stack after the call if the call doesn't return. To
3826 // make the MI verify, we'll pretend the callee does it for us.
3827 NumBytesForCalleeToPop = NumBytes;
3828 }
3829
3830 // Returns a flag for retval copy to use.
3831 if (!IsSibcall) {
3832 Chain = DAG.getCALLSEQ_END(Chain,
3833 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
3834 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
3835 true),
3836 InFlag, dl);
3837 InFlag = Chain.getValue(1);
3838 }
3839
3840 // Handle result values, copying them out of physregs into vregs that we
3841 // return.
3842 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
3843 InVals, RegMask);
3844}
3845
3846//===----------------------------------------------------------------------===//
3847// Fast Calling Convention (tail call) implementation
3848//===----------------------------------------------------------------------===//
3849
3850// Like std call, callee cleans arguments, convention except that ECX is
3851// reserved for storing the tail called function address. Only 2 registers are
3852// free for argument passing (inreg). Tail call optimization is performed
3853// provided:
3854// * tailcallopt is enabled
3855// * caller/callee are fastcc
3856// On X86_64 architecture with GOT-style position independent code only local
3857// (within module) calls are supported at the moment.
3858// To keep the stack aligned according to platform abi the function
3859// GetAlignedArgumentStackSize ensures that argument delta is always multiples
3860// of stack alignment. (Dynamic linkers need this - darwin's dyld for example)
3861// If a tail called function callee has more arguments than the caller the
3862// caller needs to make sure that there is room to move the RETADDR to. This is
3863// achieved by reserving an area the size of the argument delta right after the
3864// original RETADDR, but before the saved framepointer or the spilled registers
3865// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
3866// stack layout:
3867// arg1
3868// arg2
3869// RETADDR
3870// [ new RETADDR
3871// move area ]
3872// (possible EBP)
3873// ESI
3874// EDI
3875// local1 ..
3876
3877/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
3878/// requirement.
3879unsigned
3880X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
3881 SelectionDAG& DAG) const {
3882 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3883 const TargetFrameLowering &TFI = *Subtarget.getFrameLowering();
3884 unsigned StackAlignment = TFI.getStackAlignment();
3885 uint64_t AlignMask = StackAlignment - 1;
3886 int64_t Offset = StackSize;
3887 unsigned SlotSize = RegInfo->getSlotSize();
3888 if ( (Offset & AlignMask) <= (StackAlignment - SlotSize) ) {
3889 // Number smaller than 12 so just add the difference.
3890 Offset += ((StackAlignment - SlotSize) - (Offset & AlignMask));
3891 } else {
3892 // Mask out lower bits, add stackalignment once plus the 12 bytes.
3893 Offset = ((~AlignMask) & Offset) + StackAlignment +
3894 (StackAlignment-SlotSize);
3895 }
3896 return Offset;
3897}
3898
3899/// Return true if the given stack call argument is already available in the
3900/// same position (relatively) of the caller's incoming argument stack.
3901static
3902bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
3903 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
3904 const X86InstrInfo *TII, const CCValAssign &VA) {
3905 unsigned Bytes = Arg.getValueSizeInBits() / 8;
3906
3907 for (;;) {
3908 // Look through nodes that don't alter the bits of the incoming value.
3909 unsigned Op = Arg.getOpcode();
3910 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
3911 Arg = Arg.getOperand(0);
3912 continue;
3913 }
3914 if (Op == ISD::TRUNCATE) {
3915 const SDValue &TruncInput = Arg.getOperand(0);
3916 if (TruncInput.getOpcode() == ISD::AssertZext &&
3917 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
3918 Arg.getValueType()) {
3919 Arg = TruncInput.getOperand(0);
3920 continue;
3921 }
3922 }
3923 break;
3924 }
3925
3926 int FI = INT_MAX2147483647;
3927 if (Arg.getOpcode() == ISD::CopyFromReg) {
3928 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
3929 if (!TargetRegisterInfo::isVirtualRegister(VR))
3930 return false;
3931 MachineInstr *Def = MRI->getVRegDef(VR);
3932 if (!Def)
3933 return false;
3934 if (!Flags.isByVal()) {
3935 if (!TII->isLoadFromStackSlot(*Def, FI))
3936 return false;
3937 } else {
3938 unsigned Opcode = Def->getOpcode();
3939 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
3940 Opcode == X86::LEA64_32r) &&
3941 Def->getOperand(1).isFI()) {
3942 FI = Def->getOperand(1).getIndex();
3943 Bytes = Flags.getByValSize();
3944 } else
3945 return false;
3946 }
3947 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
3948 if (Flags.isByVal())
3949 // ByVal argument is passed in as a pointer but it's now being
3950 // dereferenced. e.g.
3951 // define @foo(%struct.X* %A) {
3952 // tail call @bar(%struct.X* byval %A)
3953 // }
3954 return false;
3955 SDValue Ptr = Ld->getBasePtr();
3956 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
3957 if (!FINode)
3958 return false;
3959 FI = FINode->getIndex();
3960 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
3961 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
3962 FI = FINode->getIndex();
3963 Bytes = Flags.getByValSize();
3964 } else
3965 return false;
3966
3967 assert(FI != INT_MAX)((FI != 2147483647) ? static_cast<void> (0) : __assert_fail
("FI != INT_MAX", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 3967, __PRETTY_FUNCTION__))
;
3968 if (!MFI.isFixedObjectIndex(FI))
3969 return false;
3970
3971 if (Offset != MFI.getObjectOffset(FI))
3972 return false;
3973
3974 if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
3975 // If the argument location is wider than the argument type, check that any
3976 // extension flags match.
3977 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
3978 Flags.isSExt() != MFI.isObjectSExt(FI)) {
3979 return false;
3980 }
3981 }
3982
3983 return Bytes == MFI.getObjectSize(FI);
3984}
3985
3986/// Check whether the call is eligible for tail call optimization. Targets
3987/// that want to do tail call optimization should implement this function.
3988bool X86TargetLowering::IsEligibleForTailCallOptimization(
3989 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3990 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
3991 const SmallVectorImpl<ISD::OutputArg> &Outs,
3992 const SmallVectorImpl<SDValue> &OutVals,
3993 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3994 if (!mayTailCallThisCC(CalleeCC))
3995 return false;
3996
3997 // If -tailcallopt is specified, make fastcc functions tail-callable.
3998 MachineFunction &MF = DAG.getMachineFunction();
3999 const Function *CallerF = MF.getFunction();
4000
4001 // If the function return type is x86_fp80 and the callee return type is not,
4002 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4003 // perform a tailcall optimization here.
4004 if (CallerF->getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4005 return false;
4006
4007 CallingConv::ID CallerCC = CallerF->getCallingConv();
4008 bool CCMatch = CallerCC == CalleeCC;
4009 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4010 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4011
4012 // Win64 functions have extra shadow space for argument homing. Don't do the
4013 // sibcall if the caller and callee have mismatched expectations for this
4014 // space.
4015 if (IsCalleeWin64 != IsCallerWin64)
4016 return false;
4017
4018 if (DAG.getTarget().Options.GuaranteedTailCallOpt) {
4019 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4020 return true;
4021 return false;
4022 }
4023
4024 // Look for obvious safe cases to perform tail call optimization that do not
4025 // require ABI changes. This is what gcc calls sibcall.
4026
4027 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4028 // emit a special epilogue.
4029 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4030 if (RegInfo->needsStackRealignment(MF))
4031 return false;
4032
4033 // Also avoid sibcall optimization if either caller or callee uses struct
4034 // return semantics.
4035 if (isCalleeStructRet || isCallerStructRet)
4036 return false;
4037
4038 // Do not sibcall optimize vararg calls unless all arguments are passed via
4039 // registers.
4040 LLVMContext &C = *DAG.getContext();
4041 if (isVarArg && !Outs.empty()) {
4042 // Optimizing for varargs on Win64 is unlikely to be safe without
4043 // additional testing.
4044 if (IsCalleeWin64 || IsCallerWin64)
4045 return false;
4046
4047 SmallVector<CCValAssign, 16> ArgLocs;
4048 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4049
4050 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4051 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4052 if (!ArgLocs[i].isRegLoc())
4053 return false;
4054 }
4055
4056 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4057 // stack. Therefore, if it's not used by the call it is not safe to optimize
4058 // this into a sibcall.
4059 bool Unused = false;
4060 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4061 if (!Ins[i].Used) {
4062 Unused = true;
4063 break;
4064 }
4065 }
4066 if (Unused) {
4067 SmallVector<CCValAssign, 16> RVLocs;
4068 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4069 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4070 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4071 CCValAssign &VA = RVLocs[i];
4072 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4073 return false;
4074 }
4075 }
4076
4077 // Check that the call results are passed in the same way.
4078 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4079 RetCC_X86, RetCC_X86))
4080 return false;
4081 // The callee has to preserve all registers the caller needs to preserve.
4082 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4083 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4084 if (!CCMatch) {
4085 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4086 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4087 return false;
4088 }
4089
4090 unsigned StackArgsSize = 0;
4091
4092 // If the callee takes no arguments then go on to check the results of the
4093 // call.
4094 if (!Outs.empty()) {
4095 // Check if stack adjustment is needed. For now, do not do this if any
4096 // argument is passed on the stack.
4097 SmallVector<CCValAssign, 16> ArgLocs;
4098 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4099
4100 // Allocate shadow area for Win64
4101 if (IsCalleeWin64)
4102 CCInfo.AllocateStack(32, 8);
4103
4104 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4105 StackArgsSize = CCInfo.getNextStackOffset();
4106
4107 if (CCInfo.getNextStackOffset()) {
4108 // Check if the arguments are already laid out in the right way as
4109 // the caller's fixed stack objects.
4110 MachineFrameInfo &MFI = MF.getFrameInfo();
4111 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4112 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4113 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4114 CCValAssign &VA = ArgLocs[i];
4115 SDValue Arg = OutVals[i];
4116 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4117 if (VA.getLocInfo() == CCValAssign::Indirect)
4118 return false;
4119 if (!VA.isRegLoc()) {
4120 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4121 MFI, MRI, TII, VA))
4122 return false;
4123 }
4124 }
4125 }
4126
4127 bool PositionIndependent = isPositionIndependent();
4128 // If the tailcall address may be in a register, then make sure it's
4129 // possible to register allocate for it. In 32-bit, the call address can
4130 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4131 // callee-saved registers are restored. These happen to be the same
4132 // registers used to pass 'inreg' arguments so watch out for those.
4133 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4134 !isa<ExternalSymbolSDNode>(Callee)) ||
4135 PositionIndependent)) {
4136 unsigned NumInRegs = 0;
4137 // In PIC we need an extra register to formulate the address computation
4138 // for the callee.
4139 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4140
4141 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4142 CCValAssign &VA = ArgLocs[i];
4143 if (!VA.isRegLoc())
4144 continue;
4145 unsigned Reg = VA.getLocReg();
4146 switch (Reg) {
4147 default: break;
4148 case X86::EAX: case X86::EDX: case X86::ECX:
4149 if (++NumInRegs == MaxInRegs)
4150 return false;
4151 break;
4152 }
4153 }
4154 }
4155
4156 const MachineRegisterInfo &MRI = MF.getRegInfo();
4157 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4158 return false;
4159 }
4160
4161 bool CalleeWillPop =
4162 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4163 MF.getTarget().Options.GuaranteedTailCallOpt);
4164
4165 if (unsigned BytesToPop =
4166 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4167 // If we have bytes to pop, the callee must pop them.
4168 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4169 if (!CalleePopMatches)
4170 return false;
4171 } else if (CalleeWillPop && StackArgsSize > 0) {
4172 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4173 return false;
4174 }
4175
4176 return true;
4177}
4178
4179FastISel *
4180X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4181 const TargetLibraryInfo *libInfo) const {
4182 return X86::createFastISel(funcInfo, libInfo);
4183}
4184
4185//===----------------------------------------------------------------------===//
4186// Other Lowering Hooks
4187//===----------------------------------------------------------------------===//
4188
4189static bool MayFoldLoad(SDValue Op) {
4190 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4191}
4192
4193static bool MayFoldIntoStore(SDValue Op) {
4194 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4195}
4196
4197static bool MayFoldIntoZeroExtend(SDValue Op) {
4198 if (Op.hasOneUse()) {
4199 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4200 return (ISD::ZERO_EXTEND == Opcode);
4201 }
4202 return false;
4203}
4204
4205static bool isTargetShuffle(unsigned Opcode) {
4206 switch(Opcode) {
4207 default: return false;
4208 case X86ISD::BLENDI:
4209 case X86ISD::PSHUFB:
4210 case X86ISD::PSHUFD:
4211 case X86ISD::PSHUFHW:
4212 case X86ISD::PSHUFLW:
4213 case X86ISD::SHUFP:
4214 case X86ISD::INSERTPS:
4215 case X86ISD::PALIGNR:
4216 case X86ISD::VSHLDQ:
4217 case X86ISD::VSRLDQ:
4218 case X86ISD::MOVLHPS:
4219 case X86ISD::MOVLHPD:
4220 case X86ISD::MOVHLPS:
4221 case X86ISD::MOVLPS:
4222 case X86ISD::MOVLPD:
4223 case X86ISD::MOVSHDUP:
4224 case X86ISD::MOVSLDUP:
4225 case X86ISD::MOVDDUP:
4226 case X86ISD::MOVSS:
4227 case X86ISD::MOVSD:
4228 case X86ISD::UNPCKL:
4229 case X86ISD::UNPCKH:
4230 case X86ISD::VBROADCAST:
4231 case X86ISD::VPERMILPI:
4232 case X86ISD::VPERMILPV:
4233 case X86ISD::VPERM2X128:
4234 case X86ISD::VPERMIL2:
4235 case X86ISD::VPERMI:
4236 case X86ISD::VPPERM:
4237 case X86ISD::VPERMV:
4238 case X86ISD::VPERMV3:
4239 case X86ISD::VPERMIV3:
4240 case X86ISD::VZEXT_MOVL:
4241 return true;
4242 }
4243}
4244
4245static bool isTargetShuffleVariableMask(unsigned Opcode) {
4246 switch (Opcode) {
4247 default: return false;
4248 // Target Shuffles.
4249 case X86ISD::PSHUFB:
4250 case X86ISD::VPERMILPV:
4251 case X86ISD::VPERMIL2:
4252 case X86ISD::VPPERM:
4253 case X86ISD::VPERMV:
4254 case X86ISD::VPERMV3:
4255 case X86ISD::VPERMIV3:
4256 return true;
4257 // 'Faux' Target Shuffles.
4258 case ISD::AND:
4259 case X86ISD::ANDNP:
4260 return true;
4261 }
4262}
4263
4264SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4265 MachineFunction &MF = DAG.getMachineFunction();
4266 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4267 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4268 int ReturnAddrIndex = FuncInfo->getRAIndex();
4269
4270 if (ReturnAddrIndex == 0) {
4271 // Set up a frame object for the return address.
4272 unsigned SlotSize = RegInfo->getSlotSize();
4273 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4274 -(int64_t)SlotSize,
4275 false);
4276 FuncInfo->setRAIndex(ReturnAddrIndex);
4277 }
4278
4279 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4280}
4281
4282bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4283 bool hasSymbolicDisplacement) {
4284 // Offset should fit into 32 bit immediate field.
4285 if (!isInt<32>(Offset))
4286 return false;
4287
4288 // If we don't have a symbolic displacement - we don't have any extra
4289 // restrictions.
4290 if (!hasSymbolicDisplacement)
4291 return true;
4292
4293 // FIXME: Some tweaks might be needed for medium code model.
4294 if (M != CodeModel::Small && M != CodeModel::Kernel)
4295 return false;
4296
4297 // For small code model we assume that latest object is 16MB before end of 31
4298 // bits boundary. We may also accept pretty large negative constants knowing
4299 // that all objects are in the positive half of address space.
4300 if (M == CodeModel::Small && Offset < 16*1024*1024)
4301 return true;
4302
4303 // For kernel code model we know that all object resist in the negative half
4304 // of 32bits address space. We may not accept negative offsets, since they may
4305 // be just off and we may accept pretty large positive ones.
4306 if (M == CodeModel::Kernel && Offset >= 0)
4307 return true;
4308
4309 return false;
4310}
4311
4312/// Determines whether the callee is required to pop its own arguments.
4313/// Callee pop is necessary to support tail calls.
4314bool X86::isCalleePop(CallingConv::ID CallingConv,
4315 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4316 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4317 // can guarantee TCO.
4318 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4319 return true;
4320
4321 switch (CallingConv) {
4322 default:
4323 return false;
4324 case CallingConv::X86_StdCall:
4325 case CallingConv::X86_FastCall:
4326 case CallingConv::X86_ThisCall:
4327 case CallingConv::X86_VectorCall:
4328 return !is64Bit;
4329 }
4330}
4331
4332/// \brief Return true if the condition is an unsigned comparison operation.
4333static bool isX86CCUnsigned(unsigned X86CC) {
4334 switch (X86CC) {
4335 default:
4336 llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4336)
;
4337 case X86::COND_E:
4338 case X86::COND_NE:
4339 case X86::COND_B:
4340 case X86::COND_A:
4341 case X86::COND_BE:
4342 case X86::COND_AE:
4343 return true;
4344 case X86::COND_G:
4345 case X86::COND_GE:
4346 case X86::COND_L:
4347 case X86::COND_LE:
4348 return false;
4349 }
4350}
4351
4352static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
4353 switch (SetCCOpcode) {
4354 default: llvm_unreachable("Invalid integer condition!")::llvm::llvm_unreachable_internal("Invalid integer condition!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4354)
;
4355 case ISD::SETEQ: return X86::COND_E;
4356 case ISD::SETGT: return X86::COND_G;
4357 case ISD::SETGE: return X86::COND_GE;
4358 case ISD::SETLT: return X86::COND_L;
4359 case ISD::SETLE: return X86::COND_LE;
4360 case ISD::SETNE: return X86::COND_NE;
4361 case ISD::SETULT: return X86::COND_B;
4362 case ISD::SETUGT: return X86::COND_A;
4363 case ISD::SETULE: return X86::COND_BE;
4364 case ISD::SETUGE: return X86::COND_AE;
4365 }
4366}
4367
4368/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
4369/// condition code, returning the condition code and the LHS/RHS of the
4370/// comparison to make.
4371static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
4372 bool isFP, SDValue &LHS, SDValue &RHS,
4373 SelectionDAG &DAG) {
4374 if (!isFP) {
4375 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4376 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
4377 // X > -1 -> X == 0, jump !sign.
4378 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4379 return X86::COND_NS;
4380 }
4381 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
4382 // X < 0 -> X == 0, jump on sign.
4383 return X86::COND_S;
4384 }
4385 if (SetCCOpcode == ISD::SETLT && RHSC->getZExtValue() == 1) {
4386 // X < 1 -> X <= 0
4387 RHS = DAG.getConstant(0, DL, RHS.getValueType());
4388 return X86::COND_LE;
4389 }
4390 }
4391
4392 return TranslateIntegerX86CC(SetCCOpcode);
4393 }
4394
4395 // First determine if it is required or is profitable to flip the operands.
4396
4397 // If LHS is a foldable load, but RHS is not, flip the condition.
4398 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
4399 !ISD::isNON_EXTLoad(RHS.getNode())) {
4400 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
4401 std::swap(LHS, RHS);
4402 }
4403
4404 switch (SetCCOpcode) {
4405 default: break;
4406 case ISD::SETOLT:
4407 case ISD::SETOLE:
4408 case ISD::SETUGT:
4409 case ISD::SETUGE:
4410 std::swap(LHS, RHS);
4411 break;
4412 }
4413
4414 // On a floating point condition, the flags are set as follows:
4415 // ZF PF CF op
4416 // 0 | 0 | 0 | X > Y
4417 // 0 | 0 | 1 | X < Y
4418 // 1 | 0 | 0 | X == Y
4419 // 1 | 1 | 1 | unordered
4420 switch (SetCCOpcode) {
4421 default: llvm_unreachable("Condcode should be pre-legalized away")::llvm::llvm_unreachable_internal("Condcode should be pre-legalized away"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4421)
;
4422 case ISD::SETUEQ:
4423 case ISD::SETEQ: return X86::COND_E;
4424 case ISD::SETOLT: // flipped
4425 case ISD::SETOGT:
4426 case ISD::SETGT: return X86::COND_A;
4427 case ISD::SETOLE: // flipped
4428 case ISD::SETOGE:
4429 case ISD::SETGE: return X86::COND_AE;
4430 case ISD::SETUGT: // flipped
4431 case ISD::SETULT:
4432 case ISD::SETLT: return X86::COND_B;
4433 case ISD::SETUGE: // flipped
4434 case ISD::SETULE:
4435 case ISD::SETLE: return X86::COND_BE;
4436 case ISD::SETONE:
4437 case ISD::SETNE: return X86::COND_NE;
4438 case ISD::SETUO: return X86::COND_P;
4439 case ISD::SETO: return X86::COND_NP;
4440 case ISD::SETOEQ:
4441 case ISD::SETUNE: return X86::COND_INVALID;
4442 }
4443}
4444
4445/// Is there a floating point cmov for the specific X86 condition code?
4446/// Current x86 isa includes the following FP cmov instructions:
4447/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
4448static bool hasFPCMov(unsigned X86CC) {
4449 switch (X86CC) {
4450 default:
4451 return false;
4452 case X86::COND_B:
4453 case X86::COND_BE:
4454 case X86::COND_E:
4455 case X86::COND_P:
4456 case X86::COND_A:
4457 case X86::COND_AE:
4458 case X86::COND_NE:
4459 case X86::COND_NP:
4460 return true;
4461 }
4462}
4463
4464
4465bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
4466 const CallInst &I,
4467 unsigned Intrinsic) const {
4468
4469 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
4470 if (!IntrData)
4471 return false;
4472
4473 Info.opc = ISD::INTRINSIC_W_CHAIN;
4474 Info.readMem = false;
4475 Info.writeMem = false;
4476 Info.vol = false;
4477 Info.offset = 0;
4478
4479 switch (IntrData->Type) {
4480 case EXPAND_FROM_MEM: {
4481 Info.ptrVal = I.getArgOperand(0);
4482 Info.memVT = MVT::getVT(I.getType());
4483 Info.align = 1;
4484 Info.readMem = true;
4485 break;
4486 }
4487 case COMPRESS_TO_MEM: {
4488 Info.ptrVal = I.getArgOperand(0);
4489 Info.memVT = MVT::getVT(I.getArgOperand(1)->getType());
4490 Info.align = 1;
4491 Info.writeMem = true;
4492 break;
4493 }
4494 case TRUNCATE_TO_MEM_VI8:
4495 case TRUNCATE_TO_MEM_VI16:
4496 case TRUNCATE_TO_MEM_VI32: {
4497 Info.ptrVal = I.getArgOperand(0);
4498 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
4499 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
4500 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
4501 ScalarVT = MVT::i8;
4502 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
4503 ScalarVT = MVT::i16;
4504 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
4505 ScalarVT = MVT::i32;
4506
4507 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
4508 Info.align = 1;
4509 Info.writeMem = true;
4510 break;
4511 }
4512 default:
4513 return false;
4514 }
4515
4516 return true;
4517}
4518
4519/// Returns true if the target can instruction select the
4520/// specified FP immediate natively. If false, the legalizer will
4521/// materialize the FP immediate as a load from a constant pool.
4522bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4523 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
4524 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
4525 return true;
4526 }
4527 return false;
4528}
4529
4530bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
4531 ISD::LoadExtType ExtTy,
4532 EVT NewVT) const {
4533 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
4534 // relocation target a movq or addq instruction: don't let the load shrink.
4535 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
4536 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
4537 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
4538 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
4539 return true;
4540}
4541
4542/// \brief Returns true if it is beneficial to convert a load of a constant
4543/// to just the constant itself.
4544bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
4545 Type *Ty) const {
4546 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4546, __PRETTY_FUNCTION__))
;
4547
4548 unsigned BitSize = Ty->getPrimitiveSizeInBits();
4549 if (BitSize == 0 || BitSize > 64)
4550 return false;
4551 return true;
4552}
4553
4554bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT,
4555 unsigned Index) const {
4556 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
4557 return false;
4558
4559 return (Index == 0 || Index == ResVT.getVectorNumElements());
4560}
4561
4562bool X86TargetLowering::isCheapToSpeculateCttz() const {
4563 // Speculate cttz only if we can directly use TZCNT.
4564 return Subtarget.hasBMI();
4565}
4566
4567bool X86TargetLowering::isCheapToSpeculateCtlz() const {
4568 // Speculate ctlz only if we can directly use LZCNT.
4569 return Subtarget.hasLZCNT();
4570}
4571
4572bool X86TargetLowering::isCtlzFast() const {
4573 return Subtarget.hasFastLZCNT();
4574}
4575
4576bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
4577 const Instruction &AndI) const {
4578 return true;
4579}
4580
4581bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
4582 if (!Subtarget.hasBMI())
4583 return false;
4584
4585 // There are only 32-bit and 64-bit forms for 'andn'.
4586 EVT VT = Y.getValueType();
4587 if (VT != MVT::i32 && VT != MVT::i64)
4588 return false;
4589
4590 return true;
4591}
4592
4593MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
4594 MVT VT = MVT::getIntegerVT(NumBits);
4595 if (isTypeLegal(VT))
4596 return VT;
4597
4598 // PMOVMSKB can handle this.
4599 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
4600 return MVT::v16i8;
4601
4602 // VPMOVMSKB can handle this.
4603 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
4604 return MVT::v32i8;
4605
4606 // TODO: Allow 64-bit type for 32-bit target.
4607 // TODO: 512-bit types should be allowed, but make sure that those
4608 // cases are handled in combineVectorSizedSetCCEquality().
4609
4610 return MVT::INVALID_SIMPLE_VALUE_TYPE;
4611}
4612
4613/// Val is the undef sentinel value or equal to the specified value.
4614static bool isUndefOrEqual(int Val, int CmpVal) {
4615 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
4616}
4617
4618/// Val is either the undef or zero sentinel value.
4619static bool isUndefOrZero(int Val) {
4620 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
4621}
4622
4623/// Return true if every element in Mask, beginning
4624/// from position Pos and ending in Pos+Size is the undef sentinel value.
4625static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
4626 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4627 if (Mask[i] != SM_SentinelUndef)
4628 return false;
4629 return true;
4630}
4631
4632/// Return true if Val is undef or if its value falls within the
4633/// specified range (L, H].
4634static bool isUndefOrInRange(int Val, int Low, int Hi) {
4635 return (Val == SM_SentinelUndef) || (Val >= Low && Val < Hi);
4636}
4637
4638/// Return true if every element in Mask is undef or if its value
4639/// falls within the specified range (L, H].
4640static bool isUndefOrInRange(ArrayRef<int> Mask,
4641 int Low, int Hi) {
4642 for (int M : Mask)
4643 if (!isUndefOrInRange(M, Low, Hi))
4644 return false;
4645 return true;
4646}
4647
4648/// Return true if Val is undef, zero or if its value falls within the
4649/// specified range (L, H].
4650static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
4651 return isUndefOrZero(Val) || (Val >= Low && Val < Hi);
4652}
4653
4654/// Return true if every element in Mask is undef, zero or if its value
4655/// falls within the specified range (L, H].
4656static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
4657 for (int M : Mask)
4658 if (!isUndefOrZeroOrInRange(M, Low, Hi))
4659 return false;
4660 return true;
4661}
4662
4663/// Return true if every element in Mask, beginning
4664/// from position Pos and ending in Pos+Size, falls within the specified
4665/// sequential range (Low, Low+Size]. or is undef.
4666static bool isSequentialOrUndefInRange(ArrayRef<int> Mask,
4667 unsigned Pos, unsigned Size, int Low) {
4668 for (unsigned i = Pos, e = Pos+Size; i != e; ++i, ++Low)
4669 if (!isUndefOrEqual(Mask[i], Low))
4670 return false;
4671 return true;
4672}
4673
4674/// Return true if every element in Mask, beginning
4675/// from position Pos and ending in Pos+Size, falls within the specified
4676/// sequential range (Low, Low+Size], or is undef or is zero.
4677static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4678 unsigned Size, int Low) {
4679 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, ++Low)
4680 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
4681 return false;
4682 return true;
4683}
4684
4685/// Return true if every element in Mask, beginning
4686/// from position Pos and ending in Pos+Size is undef or is zero.
4687static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
4688 unsigned Size) {
4689 for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
4690 if (!isUndefOrZero(Mask[i]))
4691 return false;
4692 return true;
4693}
4694
4695/// \brief Helper function to test whether a shuffle mask could be
4696/// simplified by widening the elements being shuffled.
4697///
4698/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
4699/// leaves it in an unspecified state.
4700///
4701/// NOTE: This must handle normal vector shuffle masks and *target* vector
4702/// shuffle masks. The latter have the special property of a '-2' representing
4703/// a zero-ed lane of a vector.
4704static bool canWidenShuffleElements(ArrayRef<int> Mask,
4705 SmallVectorImpl<int> &WidenedMask) {
4706 WidenedMask.assign(Mask.size() / 2, 0);
4707 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
4708 int M0 = Mask[i];
4709 int M1 = Mask[i + 1];
4710
4711 // If both elements are undef, its trivial.
4712 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
4713 WidenedMask[i / 2] = SM_SentinelUndef;
4714 continue;
4715 }
4716
4717 // Check for an undef mask and a mask value properly aligned to fit with
4718 // a pair of values. If we find such a case, use the non-undef mask's value.
4719 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
4720 WidenedMask[i / 2] = M1 / 2;
4721 continue;
4722 }
4723 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
4724 WidenedMask[i / 2] = M0 / 2;
4725 continue;
4726 }
4727
4728 // When zeroing, we need to spread the zeroing across both lanes to widen.
4729 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
4730 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
4731 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
4732 WidenedMask[i / 2] = SM_SentinelZero;
4733 continue;
4734 }
4735 return false;
4736 }
4737
4738 // Finally check if the two mask values are adjacent and aligned with
4739 // a pair.
4740 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
4741 WidenedMask[i / 2] = M0 / 2;
4742 continue;
4743 }
4744
4745 // Otherwise we can't safely widen the elements used in this shuffle.
4746 return false;
4747 }
4748 assert(WidenedMask.size() == Mask.size() / 2 &&((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4749, __PRETTY_FUNCTION__))
4749 "Incorrect size of mask after widening the elements!")((WidenedMask.size() == Mask.size() / 2 && "Incorrect size of mask after widening the elements!"
) ? static_cast<void> (0) : __assert_fail ("WidenedMask.size() == Mask.size() / 2 && \"Incorrect size of mask after widening the elements!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4749, __PRETTY_FUNCTION__))
;
4750
4751 return true;
4752}
4753
4754/// Helper function to scale a shuffle or target shuffle mask, replacing each
4755/// mask index with the scaled sequential indices for an equivalent narrowed
4756/// mask. This is the reverse process to canWidenShuffleElements, but can always
4757/// succeed.
4758static void scaleShuffleMask(int Scale, ArrayRef<int> Mask,
4759 SmallVectorImpl<int> &ScaledMask) {
4760 assert(0 < Scale && "Unexpected scaling factor")((0 < Scale && "Unexpected scaling factor") ? static_cast
<void> (0) : __assert_fail ("0 < Scale && \"Unexpected scaling factor\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4760, __PRETTY_FUNCTION__))
;
4761 int NumElts = Mask.size();
4762 ScaledMask.assign(NumElts * Scale, -1);
4763
4764 for (int i = 0; i != NumElts; ++i) {
4765 int M = Mask[i];
4766
4767 // Repeat sentinel values in every mask element.
4768 if (M < 0) {
4769 for (int s = 0; s != Scale; ++s)
4770 ScaledMask[(Scale * i) + s] = M;
4771 continue;
4772 }
4773
4774 // Scale mask element and increment across each mask element.
4775 for (int s = 0; s != Scale; ++s)
4776 ScaledMask[(Scale * i) + s] = (Scale * M) + s;
4777 }
4778}
4779
4780/// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector
4781/// extract that is suitable for instruction that extract 128 or 256 bit vectors
4782static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
4783 assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width")(((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width"
) ? static_cast<void> (0) : __assert_fail ("(vecWidth == 128 || vecWidth == 256) && \"Unexpected vector width\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4783, __PRETTY_FUNCTION__))
;
4784 if (!isa<ConstantSDNode>(N->getOperand(1).getNode()))
4785 return false;
4786
4787 // The index should be aligned on a vecWidth-bit boundary.
4788 uint64_t Index = N->getConstantOperandVal(1);
4789 MVT VT = N->getSimpleValueType(0);
4790 unsigned ElSize = VT.getScalarSizeInBits();
4791 return (Index * ElSize) % vecWidth == 0;
4792}
4793
4794/// Return true if the specified INSERT_SUBVECTOR
4795/// operand specifies a subvector insert that is suitable for input to
4796/// insertion of 128 or 256-bit subvectors
4797static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
4798 assert((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width")(((vecWidth == 128 || vecWidth == 256) && "Unexpected vector width"
) ? static_cast<void> (0) : __assert_fail ("(vecWidth == 128 || vecWidth == 256) && \"Unexpected vector width\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4798, __PRETTY_FUNCTION__))
;
4799 if (!isa<ConstantSDNode>(N->getOperand(2).getNode()))
4800 return false;
4801
4802 // The index should be aligned on a vecWidth-bit boundary.
4803 uint64_t Index = N->getConstantOperandVal(2);
4804 MVT VT = N->getSimpleValueType(0);
4805 unsigned ElSize = VT.getScalarSizeInBits();
4806 return (Index * ElSize) % vecWidth == 0;
4807}
4808
4809bool X86::isVINSERT128Index(SDNode *N) {
4810 return isVINSERTIndex(N, 128);
4811}
4812
4813bool X86::isVINSERT256Index(SDNode *N) {
4814 return isVINSERTIndex(N, 256);
4815}
4816
4817bool X86::isVEXTRACT128Index(SDNode *N) {
4818 return isVEXTRACTIndex(N, 128);
4819}
4820
4821bool X86::isVEXTRACT256Index(SDNode *N) {
4822 return isVEXTRACTIndex(N, 256);
4823}
4824
4825static unsigned getExtractVEXTRACTImmediate(SDNode *N, unsigned vecWidth) {
4826 assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width")(((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vecWidth == 128 || vecWidth == 256) && \"Unsupported vector width\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4826, __PRETTY_FUNCTION__))
;
4827 assert(isa<ConstantSDNode>(N->getOperand(1).getNode()) &&((isa<ConstantSDNode>(N->getOperand(1).getNode()) &&
"Illegal extract subvector for VEXTRACT") ? static_cast<void
> (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(1).getNode()) && \"Illegal extract subvector for VEXTRACT\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4828, __PRETTY_FUNCTION__))
4828 "Illegal extract subvector for VEXTRACT")((isa<ConstantSDNode>(N->getOperand(1).getNode()) &&
"Illegal extract subvector for VEXTRACT") ? static_cast<void
> (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(1).getNode()) && \"Illegal extract subvector for VEXTRACT\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4828, __PRETTY_FUNCTION__))
;
4829
4830 uint64_t Index = N->getConstantOperandVal(1);
4831 MVT VecVT = N->getOperand(0).getSimpleValueType();
4832 unsigned NumElemsPerChunk = vecWidth / VecVT.getScalarSizeInBits();
4833 return Index / NumElemsPerChunk;
4834}
4835
4836static unsigned getInsertVINSERTImmediate(SDNode *N, unsigned vecWidth) {
4837 assert((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width")(((vecWidth == 128 || vecWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vecWidth == 128 || vecWidth == 256) && \"Unsupported vector width\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4837, __PRETTY_FUNCTION__))
;
4838 assert(isa<ConstantSDNode>(N->getOperand(2).getNode()) &&((isa<ConstantSDNode>(N->getOperand(2).getNode()) &&
"Illegal insert subvector for VINSERT") ? static_cast<void
> (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(2).getNode()) && \"Illegal insert subvector for VINSERT\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4839, __PRETTY_FUNCTION__))
4839 "Illegal insert subvector for VINSERT")((isa<ConstantSDNode>(N->getOperand(2).getNode()) &&
"Illegal insert subvector for VINSERT") ? static_cast<void
> (0) : __assert_fail ("isa<ConstantSDNode>(N->getOperand(2).getNode()) && \"Illegal insert subvector for VINSERT\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4839, __PRETTY_FUNCTION__))
;
4840
4841 uint64_t Index = N->getConstantOperandVal(2);
4842 MVT VecVT = N->getSimpleValueType(0);
4843 unsigned NumElemsPerChunk = vecWidth / VecVT.getScalarSizeInBits();
4844 return Index / NumElemsPerChunk;
4845}
4846
4847/// Return the appropriate immediate to extract the specified
4848/// EXTRACT_SUBVECTOR index with VEXTRACTF128 and VINSERTI128 instructions.
4849unsigned X86::getExtractVEXTRACT128Immediate(SDNode *N) {
4850 return getExtractVEXTRACTImmediate(N, 128);
4851}
4852
4853/// Return the appropriate immediate to extract the specified
4854/// EXTRACT_SUBVECTOR index with VEXTRACTF64x4 and VINSERTI64x4 instructions.
4855unsigned X86::getExtractVEXTRACT256Immediate(SDNode *N) {
4856 return getExtractVEXTRACTImmediate(N, 256);
4857}
4858
4859/// Return the appropriate immediate to insert at the specified
4860/// INSERT_SUBVECTOR index with VINSERTF128 and VINSERTI128 instructions.
4861unsigned X86::getInsertVINSERT128Immediate(SDNode *N) {
4862 return getInsertVINSERTImmediate(N, 128);
4863}
4864
4865/// Return the appropriate immediate to insert at the specified
4866/// INSERT_SUBVECTOR index with VINSERTF46x4 and VINSERTI64x4 instructions.
4867unsigned X86::getInsertVINSERT256Immediate(SDNode *N) {
4868 return getInsertVINSERTImmediate(N, 256);
4869}
4870
4871/// Returns true if Elt is a constant zero or a floating point constant +0.0.
4872bool X86::isZeroNode(SDValue Elt) {
4873 return isNullConstant(Elt) || isNullFPConstant(Elt);
4874}
4875
4876// Build a vector of constants.
4877// Use an UNDEF node if MaskElt == -1.
4878// Split 64-bit constants in the 32-bit mode.
4879static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
4880 const SDLoc &dl, bool IsMask = false) {
4881
4882 SmallVector<SDValue, 32> Ops;
4883 bool Split = false;
4884
4885 MVT ConstVecVT = VT;
4886 unsigned NumElts = VT.getVectorNumElements();
4887 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
4888 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
4889 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
4890 Split = true;
4891 }
4892
4893 MVT EltVT = ConstVecVT.getVectorElementType();
4894 for (unsigned i = 0; i < NumElts; ++i) {
4895 bool IsUndef = Values[i] < 0 && IsMask;
4896 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
4897 DAG.getConstant(Values[i], dl, EltVT);
4898 Ops.push_back(OpNode);
4899 if (Split)
4900 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
4901 DAG.getConstant(0, dl, EltVT));
4902 }
4903 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
4904 if (Split)
4905 ConstsNode = DAG.getBitcast(VT, ConstsNode);
4906 return ConstsNode;
4907}
4908
4909static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
4910 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4911 assert(Bits.size() == Undefs.getBitWidth() &&((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4912, __PRETTY_FUNCTION__))
4912 "Unequal constant and undef arrays")((Bits.size() == Undefs.getBitWidth() && "Unequal constant and undef arrays"
) ? static_cast<void> (0) : __assert_fail ("Bits.size() == Undefs.getBitWidth() && \"Unequal constant and undef arrays\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4912, __PRETTY_FUNCTION__))
;
4913 SmallVector<SDValue, 32> Ops;
4914 bool Split = false;
4915
4916 MVT ConstVecVT = VT;
4917 unsigned NumElts = VT.getVectorNumElements();
4918 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
4919 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
4920 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
4921 Split = true;
4922 }
4923
4924 MVT EltVT = ConstVecVT.getVectorElementType();
4925 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
4926 if (Undefs[i]) {
4927 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
4928 continue;
4929 }
4930 const APInt &V = Bits[i];
4931 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"
) ? static_cast<void> (0) : __assert_fail ("V.getBitWidth() == VT.getScalarSizeInBits() && \"Unexpected sizes\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4931, __PRETTY_FUNCTION__))
;
4932 if (Split) {
4933 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
4934 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
4935 } else if (EltVT == MVT::f32) {
4936 APFloat FV(APFloat::IEEEsingle(), V);
4937 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
4938 } else if (EltVT == MVT::f64) {
4939 APFloat FV(APFloat::IEEEdouble(), V);
4940 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
4941 } else {
4942 Ops.push_back(DAG.getConstant(V, dl, EltVT));
4943 }
4944 }
4945
4946 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
4947 return DAG.getBitcast(VT, ConstsNode);
4948}
4949
4950/// Returns a vector of specified type with all zero elements.
4951static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
4952 SelectionDAG &DAG, const SDLoc &dl) {
4953 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4955, __PRETTY_FUNCTION__))
4954 VT.getVectorElementType() == MVT::i1) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4955, __PRETTY_FUNCTION__))
4955 "Unexpected vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
() || VT.getVectorElementType() == MVT::i1) && "Unexpected vector type"
) ? static_cast<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || VT.getVectorElementType() == MVT::i1) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4955, __PRETTY_FUNCTION__))
;
4956
4957 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
4958 // type. This ensures they get CSE'd. But if the integer type is not
4959 // available, use a floating-point +0.0 instead.
4960 SDValue Vec;
4961 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
4962 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
4963 } else if (VT.getVectorElementType() == MVT::i1) {
4964 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4965, __PRETTY_FUNCTION__))
4965 "Unexpected vector type")(((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4965, __PRETTY_FUNCTION__))
;
4966 assert((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&(((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4967, __PRETTY_FUNCTION__))
4967 "Unexpected vector type")(((Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) &&
"Unexpected vector type") ? static_cast<void> (0) : __assert_fail
("(Subtarget.hasVLX() || VT.getVectorNumElements() >= 8) && \"Unexpected vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4967, __PRETTY_FUNCTION__))
;
4968 Vec = DAG.getConstant(0, dl, VT);
4969 } else {
4970 unsigned Num32BitElts = VT.getSizeInBits() / 32;
4971 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
4972 }
4973 return DAG.getBitcast(VT, Vec);
4974}
4975
4976static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
4977 const SDLoc &dl, unsigned vectorWidth) {
4978 EVT VT = Vec.getValueType();
4979 EVT ElVT = VT.getVectorElementType();
4980 unsigned Factor = VT.getSizeInBits()/vectorWidth;
4981 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
4982 VT.getVectorNumElements()/Factor);
4983
4984 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
4985 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
4986 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 4986, __PRETTY_FUNCTION__))
;
4987
4988 // This is the index of the first element of the vectorWidth-bit chunk
4989 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
4990 IdxVal &= ~(ElemsPerChunk - 1);
4991
4992 // If the input is a buildvector just emit a smaller one.
4993 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
4994 return DAG.getBuildVector(
4995 ResultVT, dl, makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
4996
4997 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
4998 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
4999}
5000
5001/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5002/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5003/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5004/// instructions or a simple subregister reference. Idx is an index in the
5005/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5006/// lowering EXTRACT_VECTOR_ELT operations easier.
5007static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5008 SelectionDAG &DAG, const SDLoc &dl) {
5009 assert((Vec.getValueType().is256BitVector() ||(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5010, __PRETTY_FUNCTION__))
5010 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")(((Vec.getValueType().is256BitVector() || Vec.getValueType().
is512BitVector()) && "Unexpected vector size!") ? static_cast
<void> (0) : __assert_fail ("(Vec.getValueType().is256BitVector() || Vec.getValueType().is512BitVector()) && \"Unexpected vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5010, __PRETTY_FUNCTION__))
;
5011 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5012}
5013
5014/// Generate a DAG to grab 256-bits from a 512-bit vector.
5015static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5016 SelectionDAG &DAG, const SDLoc &dl) {
5017 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((Vec.getValueType().is512BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is512BitVector() && \"Unexpected vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5017, __PRETTY_FUNCTION__))
;
5018 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5019}
5020
5021static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5022 SelectionDAG &DAG, const SDLoc &dl,
5023 unsigned vectorWidth) {
5024 assert((vectorWidth == 128 || vectorWidth == 256) &&(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5025, __PRETTY_FUNCTION__))
5025 "Unsupported vector width")(((vectorWidth == 128 || vectorWidth == 256) && "Unsupported vector width"
) ? static_cast<void> (0) : __assert_fail ("(vectorWidth == 128 || vectorWidth == 256) && \"Unsupported vector width\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5025, __PRETTY_FUNCTION__))
;
5026 // Inserting UNDEF is Result
5027 if (Vec.isUndef())
5028 return Result;
5029 EVT VT = Vec.getValueType();
5030 EVT ElVT = VT.getVectorElementType();
5031 EVT ResultVT = Result.getValueType();
5032
5033 // Insert the relevant vectorWidth bits.
5034 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5035 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(ElemsPerChunk) && \"Elements per chunk not power of 2\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5035, __PRETTY_FUNCTION__))
;
5036
5037 // This is the index of the first element of the vectorWidth-bit chunk
5038 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5039 IdxVal &= ~(ElemsPerChunk - 1);
5040
5041 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5042 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5043}
5044
5045/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5046/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5047/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5048/// simple superregister reference. Idx is an index in the 128 bits
5049/// we want. It need not be aligned to a 128-bit boundary. That makes
5050/// lowering INSERT_VECTOR_ELT operations easier.
5051static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5052 SelectionDAG &DAG, const SDLoc &dl) {
5053 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((Vec.getValueType().is128BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is128BitVector() && \"Unexpected vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5053, __PRETTY_FUNCTION__))
;
5054 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5055}
5056
5057static SDValue insert256BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5058 SelectionDAG &DAG, const SDLoc &dl) {
5059 assert(Vec.getValueType().is256BitVector() && "Unexpected vector size!")((Vec.getValueType().is256BitVector() && "Unexpected vector size!"
) ? static_cast<void> (0) : __assert_fail ("Vec.getValueType().is256BitVector() && \"Unexpected vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5059, __PRETTY_FUNCTION__))
;
5060 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 256);
5061}
5062
5063/// Insert i1-subvector to i1-vector.
5064static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG,
5065 const X86Subtarget &Subtarget) {
5066
5067 SDLoc dl(Op);
5068 SDValue Vec = Op.getOperand(0);
5069 SDValue SubVec = Op.getOperand(1);
5070 SDValue Idx = Op.getOperand(2);
5071
5072 if (!isa<ConstantSDNode>(Idx))
5073 return SDValue();
5074
5075 unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
5076 if (IdxVal == 0 && Vec.isUndef()) // the operation is legal
5077 return Op;
5078
5079 MVT OpVT = Op.getSimpleValueType();
5080 MVT SubVecVT = SubVec.getSimpleValueType();
5081 unsigned NumElems = OpVT.getVectorNumElements();
5082 unsigned SubVecNumElems = SubVecVT.getVectorNumElements();
5083
5084 assert(IdxVal + SubVecNumElems <= NumElems &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5086, __PRETTY_FUNCTION__))
5085 IdxVal % SubVecVT.getSizeInBits() == 0 &&((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5086, __PRETTY_FUNCTION__))
5086 "Unexpected index value in INSERT_SUBVECTOR")((IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT
.getSizeInBits() == 0 && "Unexpected index value in INSERT_SUBVECTOR"
) ? static_cast<void> (0) : __assert_fail ("IdxVal + SubVecNumElems <= NumElems && IdxVal % SubVecVT.getSizeInBits() == 0 && \"Unexpected index value in INSERT_SUBVECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5086, __PRETTY_FUNCTION__))
;
5087
5088 // There are 3 possible cases:
5089 // 1. Subvector should be inserted in the lower part (IdxVal == 0)
5090 // 2. Subvector should be inserted in the upper part
5091 // (IdxVal + SubVecNumElems == NumElems)
5092 // 3. Subvector should be inserted in the middle (for example v2i1
5093 // to v16i1, index 2)
5094
5095 // extend to natively supported kshift
5096 MVT MinVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1;
5097 MVT WideOpVT = OpVT;
5098 if (OpVT.getSizeInBits() < MinVT.getStoreSizeInBits())
5099 WideOpVT = MinVT;
5100
5101 SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl);
5102 SDValue Undef = DAG.getUNDEF(WideOpVT);
5103 SDValue WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5104 Undef, SubVec, ZeroIdx);
5105
5106 // Extract sub-vector if require.
5107 auto ExtractSubVec = [&](SDValue V) {
5108 return (WideOpVT == OpVT) ? V : DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
5109 OpVT, V, ZeroIdx);
5110 };
5111
5112 if (Vec.isUndef()) {
5113 if (IdxVal != 0) {
5114 SDValue ShiftBits = DAG.getConstant(IdxVal, dl, MVT::i8);
5115 WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
5116 ShiftBits);
5117 }
5118 return ExtractSubVec(WideSubVec);
5119 }
5120
5121 if (ISD::isBuildVectorAllZeros(Vec.getNode())) {
5122 NumElems = WideOpVT.getVectorNumElements();
5123 unsigned ShiftLeft = NumElems - SubVecNumElems;
5124 unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal;
5125 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
5126 DAG.getConstant(ShiftLeft, dl, MVT::i8));
5127 Vec = ShiftRight ? DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec,
5128 DAG.getConstant(ShiftRight, dl, MVT::i8)) : Vec;
5129 return ExtractSubVec(Vec);
5130 }
5131
5132 if (IdxVal == 0) {
5133 // Zero lower bits of the Vec
5134 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5135 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5136 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5137 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5138 // Merge them together, SubVec should be zero extended.
5139 WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT,
5140 getZeroVector(WideOpVT, Subtarget, DAG, dl),
5141 SubVec, ZeroIdx);
5142 Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
5143 return ExtractSubVec(Vec);
5144 }
5145
5146 // Simple case when we put subvector in the upper part
5147 if (IdxVal + SubVecNumElems == NumElems) {
5148 // Zero upper bits of the Vec
5149 WideSubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, WideSubVec,
5150 DAG.getConstant(IdxVal, dl, MVT::i8));
5151 SDValue ShiftBits = DAG.getConstant(SubVecNumElems, dl, MVT::i8);
5152 Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx);
5153 Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits);
5154 Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits);
5155 Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, WideSubVec);
5156 return ExtractSubVec(Vec);
5157 }
5158 // Subvector should be inserted in the middle - use shuffle
5159 WideSubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, Undef,
5160 SubVec, ZeroIdx);
5161 SmallVector<int, 64> Mask;
5162 for (unsigned i = 0; i < NumElems; ++i)
5163 Mask.push_back(i >= IdxVal && i < IdxVal + SubVecNumElems ?
5164 i : i + NumElems);
5165 return DAG.getVectorShuffle(OpVT, dl, WideSubVec, Vec, Mask);
5166}
5167
5168/// Concat two 128-bit vectors into a 256 bit vector using VINSERTF128
5169/// instructions. This is used because creating CONCAT_VECTOR nodes of
5170/// BUILD_VECTORS returns a larger BUILD_VECTOR while we're trying to lower
5171/// large BUILD_VECTORS.
5172static SDValue concat128BitVectors(SDValue V1, SDValue V2, EVT VT,
5173 unsigned NumElems, SelectionDAG &DAG,
5174 const SDLoc &dl) {
5175 SDValue V = insert128BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
5176 return insert128BitVector(V, V2, NumElems / 2, DAG, dl);
5177}
5178
5179static SDValue concat256BitVectors(SDValue V1, SDValue V2, EVT VT,
5180 unsigned NumElems, SelectionDAG &DAG,
5181 const SDLoc &dl) {
5182 SDValue V = insert256BitVector(DAG.getUNDEF(VT), V1, 0, DAG, dl);
5183 return insert256BitVector(V, V2, NumElems / 2, DAG, dl);
5184}
5185
5186/// Returns a vector of specified type with all bits set.
5187/// Always build ones vectors as <4 x i32>, <8 x i32> or <16 x i32>.
5188/// Then bitcast to their original type, ensuring they get CSE'd.
5189static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5190 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5191, __PRETTY_FUNCTION__))
5191 "Expected a 128/256/512-bit vector type")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected a 128/256/512-bit vector type") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected a 128/256/512-bit vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5191, __PRETTY_FUNCTION__))
;
5192
5193 APInt Ones = APInt::getAllOnesValue(32);
5194 unsigned NumElts = VT.getSizeInBits() / 32;
5195 SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts));
5196 return DAG.getBitcast(VT, Vec);
5197}
5198
5199static SDValue getExtendInVec(unsigned Opc, const SDLoc &DL, EVT VT, SDValue In,
5200 SelectionDAG &DAG) {
5201 EVT InVT = In.getValueType();
5202 assert((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode")(((X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && "Unexpected opcode"
) ? static_cast<void> (0) : __assert_fail ("(X86ISD::VSEXT == Opc || X86ISD::VZEXT == Opc) && \"Unexpected opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5202, __PRETTY_FUNCTION__))
;
5203
5204 if (VT.is128BitVector() && InVT.is128BitVector())
5205 return X86ISD::VSEXT == Opc ? DAG.getSignExtendVectorInReg(In, DL, VT)
5206 : DAG.getZeroExtendVectorInReg(In, DL, VT);
5207
5208 // For 256-bit vectors, we only need the lower (128-bit) input half.
5209 // For 512-bit vectors, we only need the lower input half or quarter.
5210 if (VT.getSizeInBits() > 128 && InVT.getSizeInBits() > 128) {
5211 int Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits();
5212 In = extractSubVector(In, 0, DAG, DL,
5213 std::max(128, (int)VT.getSizeInBits() / Scale));
5214 }
5215
5216 return DAG.getNode(Opc, DL, VT, In);
5217}
5218
5219/// Generate unpacklo/unpackhi shuffle mask.
5220static void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
5221 bool Unary) {
5222 assert(Mask.empty() && "Expected an empty shuffle mask vector")((Mask.empty() && "Expected an empty shuffle mask vector"
) ? static_cast<void> (0) : __assert_fail ("Mask.empty() && \"Expected an empty shuffle mask vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5222, __PRETTY_FUNCTION__))
;
5223 int NumElts = VT.getVectorNumElements();
5224 int NumEltsInLane = 128 / VT.getScalarSizeInBits();
5225
5226 for (int i = 0; i < NumElts; ++i) {
5227 unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
5228 int Pos = (i % NumEltsInLane) / 2 + LaneStart;
5229 Pos += (Unary ? 0 : NumElts * (i % 2));
5230 Pos += (Lo ? 0 : NumEltsInLane / 2);
5231 Mask.push_back(Pos);
5232 }
5233}
5234
5235/// Returns a vector_shuffle node for an unpackl operation.
5236static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5237 SDValue V1, SDValue V2) {
5238 SmallVector<int, 8> Mask;
5239 createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
5240 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5241}
5242
5243/// Returns a vector_shuffle node for an unpackh operation.
5244static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
5245 SDValue V1, SDValue V2) {
5246 SmallVector<int, 8> Mask;
5247 createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
5248 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
5249}
5250
5251/// Return a vector_shuffle of the specified vector of zero or undef vector.
5252/// This produces a shuffle where the low element of V2 is swizzled into the
5253/// zero/undef vector, landing at element Idx.
5254/// This produces a shuffle mask like 4,1,2,3 (idx=0) or 0,1,2,4 (idx=3).
5255static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx,
5256 bool IsZero,
5257 const X86Subtarget &Subtarget,
5258 SelectionDAG &DAG) {
5259 MVT VT = V2.getSimpleValueType();
5260 SDValue V1 = IsZero
5261 ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT);
5262 int NumElems = VT.getVectorNumElements();
5263 SmallVector<int, 16> MaskVec(NumElems);
5264 for (int i = 0; i != NumElems; ++i)
5265 // If this is the insertion idx, put the low elt of V2 here.
5266 MaskVec[i] = (i == Idx) ? NumElems : i;
5267 return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec);
5268}
5269
5270static SDValue peekThroughBitcasts(SDValue V) {
5271 while (V.getNode() && V.getOpcode() == ISD::BITCAST)
5272 V = V.getOperand(0);
5273 return V;
5274}
5275
5276static SDValue peekThroughOneUseBitcasts(SDValue V) {
5277 while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
5278 V.getOperand(0).hasOneUse())
5279 V = V.getOperand(0);
5280 return V;
5281}
5282
5283static const Constant *getTargetConstantFromNode(SDValue Op) {
5284 Op = peekThroughBitcasts(Op);
5285
5286 auto *Load = dyn_cast<LoadSDNode>(Op);
5287 if (!Load)
5288 return nullptr;
5289
5290 SDValue Ptr = Load->getBasePtr();
5291 if (Ptr->getOpcode() == X86ISD::Wrapper ||
5292 Ptr->getOpcode() == X86ISD::WrapperRIP)
5293 Ptr = Ptr->getOperand(0);
5294
5295 auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
5296 if (!CNode || CNode->isMachineConstantPoolEntry())
5297 return nullptr;
5298
5299 return dyn_cast<Constant>(CNode->getConstVal());
5300}
5301
5302// Extract raw constant bits from constant pools.
5303static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
5304 APInt &UndefElts,
5305 SmallVectorImpl<APInt> &EltBits,
5306 bool AllowWholeUndefs = true,
5307 bool AllowPartialUndefs = true) {
5308 assert(EltBits.empty() && "Expected an empty EltBits vector")((EltBits.empty() && "Expected an empty EltBits vector"
) ? static_cast<void> (0) : __assert_fail ("EltBits.empty() && \"Expected an empty EltBits vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5308, __PRETTY_FUNCTION__))
;
5309
5310 Op = peekThroughBitcasts(Op);
5311
5312 EVT VT = Op.getValueType();
5313 unsigned SizeInBits = VT.getSizeInBits();
5314 assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!")(((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"
) ? static_cast<void> (0) : __assert_fail ("(SizeInBits % EltSizeInBits) == 0 && \"Can't split constant!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5314, __PRETTY_FUNCTION__))
;
5315 unsigned NumElts = SizeInBits / EltSizeInBits;
5316
5317 unsigned SrcEltSizeInBits = VT.getScalarSizeInBits();
5318 unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits;
5319
5320 // Extract all the undef/constant element data and pack into single bitsets.
5321 APInt UndefBits(SizeInBits, 0);
5322 APInt MaskBits(SizeInBits, 0);
5323
5324 // Split the undef/constant single bitset data into the target elements.
5325 auto SplitBitData = [&]() {
5326 // Don't split if we don't allow undef bits.
5327 bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs;
5328 if (UndefBits.getBoolValue() && !AllowUndefs)
5329 return false;
5330
5331 UndefElts = APInt(NumElts, 0);
5332 EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
5333
5334 for (unsigned i = 0; i != NumElts; ++i) {
5335 unsigned BitOffset = i * EltSizeInBits;
5336 APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset);
5337
5338 // Only treat an element as UNDEF if all bits are UNDEF.
5339 if (UndefEltBits.isAllOnesValue()) {
5340 if (!AllowWholeUndefs)
5341 return false;
5342 UndefElts.setBit(i);
5343 continue;
5344 }
5345
5346 // If only some bits are UNDEF then treat them as zero (or bail if not
5347 // supported).
5348 if (UndefEltBits.getBoolValue() && !AllowPartialUndefs)
5349 return false;
5350
5351 APInt Bits = MaskBits.extractBits(EltSizeInBits, BitOffset);
5352 EltBits[i] = Bits.getZExtValue();
5353 }
5354 return true;
5355 };
5356
5357 // Collect constant bits and insert into mask/undef bit masks.
5358 auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs,
5359 unsigned BitOffset) {
5360 if (!Cst)
5361 return false;
5362 if (isa<UndefValue>(Cst)) {
5363 unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
5364 Undefs.setBits(BitOffset, BitOffset + CstSizeInBits);
5365 return true;
5366 }
5367 if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
5368 Mask.insertBits(CInt->getValue(), BitOffset);
5369 return true;
5370 }
5371 if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
5372 Mask.insertBits(CFP->getValueAPF().bitcastToAPInt(), BitOffset);
5373 return true;
5374 }
5375 return false;
5376 };
5377
5378 // Extract constant bits from build vector.
5379 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
5380 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
5381 const SDValue &Src = Op.getOperand(i);
5382 unsigned BitOffset = i * SrcEltSizeInBits;
5383 if (Src.isUndef()) {
5384 UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits);
5385 continue;
5386 }
5387 auto *Cst = cast<ConstantSDNode>(Src);
5388 APInt Bits = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5389 MaskBits.insertBits(Bits, BitOffset);
5390 }
5391 return SplitBitData();
5392 }
5393
5394 // Extract constant bits from constant pool vector.
5395 if (auto *Cst = getTargetConstantFromNode(Op)) {
5396 Type *CstTy = Cst->getType();
5397 if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
5398 return false;
5399
5400 unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
5401 for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i)
5402 if (!CollectConstantBits(Cst->getAggregateElement(i), MaskBits, UndefBits,
5403 i * CstEltSizeInBits))
5404 return false;
5405
5406 return SplitBitData();
5407 }
5408
5409 // Extract constant bits from a broadcasted constant pool scalar.
5410 if (Op.getOpcode() == X86ISD::VBROADCAST &&
5411 EltSizeInBits <= SrcEltSizeInBits) {
5412 if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
5413 APInt Bits(SizeInBits, 0);
5414 APInt Undefs(SizeInBits, 0);
5415 if (CollectConstantBits(Broadcast, Bits, Undefs, 0)) {
5416 for (unsigned i = 0; i != NumSrcElts; ++i) {
5417 MaskBits |= Bits.shl(i * SrcEltSizeInBits);
5418 UndefBits |= Undefs.shl(i * SrcEltSizeInBits);
5419 }
5420 return SplitBitData();
5421 }
5422 }
5423 }
5424
5425 // Extract a rematerialized scalar constant insertion.
5426 if (Op.getOpcode() == X86ISD::VZEXT_MOVL &&
5427 Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
5428 isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) {
5429 auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0));
5430 MaskBits = CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits);
5431 MaskBits = MaskBits.zext(SizeInBits);
5432 return SplitBitData();
5433 }
5434
5435 return false;
5436}
5437
5438static bool getTargetShuffleMaskIndices(SDValue MaskNode,
5439 unsigned MaskEltSizeInBits,
5440 SmallVectorImpl<uint64_t> &RawMask) {
5441 APInt UndefElts;
5442 SmallVector<APInt, 64> EltBits;
5443
5444 // Extract the raw target constant bits.
5445 // FIXME: We currently don't support UNDEF bits or mask entries.
5446 if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts,
5447 EltBits, /* AllowWholeUndefs */ false,
5448 /* AllowPartialUndefs */ false))
5449 return false;
5450
5451 // Insert the extracted elements into the mask.
5452 for (APInt Elt : EltBits)
5453 RawMask.push_back(Elt.getZExtValue());
5454
5455 return true;
5456}
5457
5458/// Calculates the shuffle mask corresponding to the target-specific opcode.
5459/// If the mask could be calculated, returns it in \p Mask, returns the shuffle
5460/// operands in \p Ops, and returns true.
5461/// Sets \p IsUnary to true if only one source is used. Note that this will set
5462/// IsUnary for shuffles which use a single input multiple times, and in those
5463/// cases it will adjust the mask to only have indices within that single input.
5464/// It is an error to call this with non-empty Mask/Ops vectors.
5465static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
5466 SmallVectorImpl<SDValue> &Ops,
5467 SmallVectorImpl<int> &Mask, bool &IsUnary) {
5468 unsigned NumElems = VT.getVectorNumElements();
5469 SDValue ImmN;
5470
5471 assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector")((Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"
) ? static_cast<void> (0) : __assert_fail ("Mask.empty() && \"getTargetShuffleMask expects an empty Mask vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5471, __PRETTY_FUNCTION__))
;
5472 assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector")((Ops.empty() && "getTargetShuffleMask expects an empty Ops vector"
) ? static_cast<void> (0) : __assert_fail ("Ops.empty() && \"getTargetShuffleMask expects an empty Ops vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5472, __PRETTY_FUNCTION__))
;
5473
5474 IsUnary = false;
5475 bool IsFakeUnary = false;
5476 switch(N->getOpcode()) {
5477 case X86ISD::BLENDI:
5478 ImmN = N->getOperand(N->getNumOperands()-1);
5479 DecodeBLENDMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5480 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5481 break;
5482 case X86ISD::SHUFP:
5483 ImmN = N->getOperand(N->getNumOperands()-1);
5484 DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5485 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5486 break;
5487 case X86ISD::INSERTPS:
5488 ImmN = N->getOperand(N->getNumOperands()-1);
5489 DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5490 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5491 break;
5492 case X86ISD::UNPCKH:
5493 DecodeUNPCKHMask(VT, Mask);
5494 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5495 break;
5496 case X86ISD::UNPCKL:
5497 DecodeUNPCKLMask(VT, Mask);
5498 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5499 break;
5500 case X86ISD::MOVHLPS:
5501 DecodeMOVHLPSMask(NumElems, Mask);
5502 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5503 break;
5504 case X86ISD::MOVLHPS:
5505 DecodeMOVLHPSMask(NumElems, Mask);
5506 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5507 break;
5508 case X86ISD::PALIGNR:
5509 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")((VT.getScalarType() == MVT::i8 && "Byte vector expected"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5509, __PRETTY_FUNCTION__))
;
5510 ImmN = N->getOperand(N->getNumOperands()-1);
5511 DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5512 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5513 Ops.push_back(N->getOperand(1));
5514 Ops.push_back(N->getOperand(0));
5515 break;
5516 case X86ISD::VSHLDQ:
5517 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")((VT.getScalarType() == MVT::i8 && "Byte vector expected"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5517, __PRETTY_FUNCTION__))
;
5518 ImmN = N->getOperand(N->getNumOperands() - 1);
5519 DecodePSLLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5520 IsUnary = true;
5521 break;
5522 case X86ISD::VSRLDQ:
5523 assert(VT.getScalarType() == MVT::i8 && "Byte vector expected")((VT.getScalarType() == MVT::i8 && "Byte vector expected"
) ? static_cast<void> (0) : __assert_fail ("VT.getScalarType() == MVT::i8 && \"Byte vector expected\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5523, __PRETTY_FUNCTION__))
;
5524 ImmN = N->getOperand(N->getNumOperands() - 1);
5525 DecodePSRLDQMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5526 IsUnary = true;
5527 break;
5528 case X86ISD::PSHUFD:
5529 case X86ISD::VPERMILPI:
5530 ImmN = N->getOperand(N->getNumOperands()-1);
5531 DecodePSHUFMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5532 IsUnary = true;
5533 break;
5534 case X86ISD::PSHUFHW:
5535 ImmN = N->getOperand(N->getNumOperands()-1);
5536 DecodePSHUFHWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5537 IsUnary = true;
5538 break;
5539 case X86ISD::PSHUFLW:
5540 ImmN = N->getOperand(N->getNumOperands()-1);
5541 DecodePSHUFLWMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5542 IsUnary = true;
5543 break;
5544 case X86ISD::VZEXT_MOVL:
5545 DecodeZeroMoveLowMask(VT, Mask);
5546 IsUnary = true;
5547 break;
5548 case X86ISD::VBROADCAST: {
5549 SDValue N0 = N->getOperand(0);
5550 // See if we're broadcasting from index 0 of an EXTRACT_SUBVECTOR. If so,
5551 // add the pre-extracted value to the Ops vector.
5552 if (N0.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
5553 N0.getOperand(0).getValueType() == VT &&
5554 N0.getConstantOperandVal(1) == 0)
5555 Ops.push_back(N0.getOperand(0));
5556
5557 // We only decode broadcasts of same-sized vectors, unless the broadcast
5558 // came from an extract from the original width. If we found one, we
5559 // pushed it the Ops vector above.
5560 if (N0.getValueType() == VT || !Ops.empty()) {
5561 DecodeVectorBroadcast(VT, Mask);
5562 IsUnary = true;
5563 break;
5564 }
5565 return false;
5566 }
5567 case X86ISD::VPERMILPV: {
5568 IsUnary = true;
5569 SDValue MaskNode = N->getOperand(1);
5570 unsigned MaskEltSize = VT.getScalarSizeInBits();
5571 SmallVector<uint64_t, 32> RawMask;
5572 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5573 DecodeVPERMILPMask(VT, RawMask, Mask);
5574 break;
5575 }
5576 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5577 DecodeVPERMILPMask(C, MaskEltSize, Mask);
5578 break;
5579 }
5580 return false;
5581 }
5582 case X86ISD::PSHUFB: {
5583 IsUnary = true;
5584 SDValue MaskNode = N->getOperand(1);
5585 SmallVector<uint64_t, 32> RawMask;
5586 if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
5587 DecodePSHUFBMask(RawMask, Mask);
5588 break;
5589 }
5590 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5591 DecodePSHUFBMask(C, Mask);
5592 break;
5593 }
5594 return false;
5595 }
5596 case X86ISD::VPERMI:
5597 ImmN = N->getOperand(N->getNumOperands()-1);
5598 DecodeVPERMMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5599 IsUnary = true;
5600 break;
5601 case X86ISD::MOVSS:
5602 case X86ISD::MOVSD:
5603 DecodeScalarMoveMask(VT, /* IsLoad */ false, Mask);
5604 break;
5605 case X86ISD::VPERM2X128:
5606 ImmN = N->getOperand(N->getNumOperands()-1);
5607 DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
5608 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5609 break;
5610 case X86ISD::MOVSLDUP:
5611 DecodeMOVSLDUPMask(VT, Mask);
5612 IsUnary = true;
5613 break;
5614 case X86ISD::MOVSHDUP:
5615 DecodeMOVSHDUPMask(VT, Mask);
5616 IsUnary = true;
5617 break;
5618 case X86ISD::MOVDDUP:
5619 DecodeMOVDDUPMask(VT, Mask);
5620 IsUnary = true;
5621 break;
5622 case X86ISD::MOVLHPD:
5623 case X86ISD::MOVLPD:
5624 case X86ISD::MOVLPS:
5625 // Not yet implemented
5626 return false;
5627 case X86ISD::VPERMIL2: {
5628 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5629 unsigned MaskEltSize = VT.getScalarSizeInBits();
5630 SDValue MaskNode = N->getOperand(2);
5631 SDValue CtrlNode = N->getOperand(3);
5632 if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
5633 unsigned CtrlImm = CtrlOp->getZExtValue();
5634 SmallVector<uint64_t, 32> RawMask;
5635 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5636 DecodeVPERMIL2PMask(VT, CtrlImm, RawMask, Mask);
5637 break;
5638 }
5639 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5640 DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
5641 break;
5642 }
5643 }
5644 return false;
5645 }
5646 case X86ISD::VPPERM: {
5647 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
5648 SDValue MaskNode = N->getOperand(2);
5649 SmallVector<uint64_t, 32> RawMask;
5650 if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask)) {
5651 DecodeVPPERMMask(RawMask, Mask);
5652 break;
5653 }
5654 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5655 DecodeVPPERMMask(C, Mask);
5656 break;
5657 }
5658 return false;
5659 }
5660 case X86ISD::VPERMV: {
5661 IsUnary = true;
5662 // Unlike most shuffle nodes, VPERMV's mask operand is operand 0.
5663 Ops.push_back(N->getOperand(1));
5664 SDValue MaskNode = N->getOperand(0);
5665 SmallVector<uint64_t, 32> RawMask;
5666 unsigned MaskEltSize = VT.getScalarSizeInBits();
5667 if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask)) {
5668 DecodeVPERMVMask(RawMask, Mask);
5669 break;
5670 }
5671 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5672 DecodeVPERMVMask(C, MaskEltSize, Mask);
5673 break;
5674 }
5675 return false;
5676 }
5677 case X86ISD::VPERMV3: {
5678 IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2);
5679 // Unlike most shuffle nodes, VPERMV3's mask operand is the middle one.
5680 Ops.push_back(N->getOperand(0));
5681 Ops.push_back(N->getOperand(2));
5682 SDValue MaskNode = N->getOperand(1);
5683 unsigned MaskEltSize = VT.getScalarSizeInBits();
5684 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5685 DecodeVPERMV3Mask(C, MaskEltSize, Mask);
5686 break;
5687 }
5688 return false;
5689 }
5690 case X86ISD::VPERMIV3: {
5691 IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2);
5692 // Unlike most shuffle nodes, VPERMIV3's mask operand is the first one.
5693 Ops.push_back(N->getOperand(1));
5694 Ops.push_back(N->getOperand(2));
5695 SDValue MaskNode = N->getOperand(0);
5696 unsigned MaskEltSize = VT.getScalarSizeInBits();
5697 if (auto *C = getTargetConstantFromNode(MaskNode)) {
5698 DecodeVPERMV3Mask(C, MaskEltSize, Mask);
5699 break;
5700 }
5701 return false;
5702 }
5703 default: llvm_unreachable("unknown target shuffle node")::llvm::llvm_unreachable_internal("unknown target shuffle node"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5703)
;
5704 }
5705
5706 // Empty mask indicates the decode failed.
5707 if (Mask.empty())
5708 return false;
5709
5710 // Check if we're getting a shuffle mask with zero'd elements.
5711 if (!AllowSentinelZero)
5712 if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
5713 return false;
5714
5715 // If we have a fake unary shuffle, the shuffle mask is spread across two
5716 // inputs that are actually the same node. Re-map the mask to always point
5717 // into the first input.
5718 if (IsFakeUnary)
5719 for (int &M : Mask)
5720 if (M >= (int)Mask.size())
5721 M -= Mask.size();
5722
5723 // If we didn't already add operands in the opcode-specific code, default to
5724 // adding 1 or 2 operands starting at 0.
5725 if (Ops.empty()) {
5726 Ops.push_back(N->getOperand(0));
5727 if (!IsUnary || IsFakeUnary)
5728 Ops.push_back(N->getOperand(1));
5729 }
5730
5731 return true;
5732}
5733
5734/// Check a target shuffle mask's inputs to see if we can set any values to
5735/// SM_SentinelZero - this is for elements that are known to be zero
5736/// (not just zeroable) from their inputs.
5737/// Returns true if the target shuffle mask was decoded.
5738static bool setTargetShuffleZeroElements(SDValue N,
5739 SmallVectorImpl<int> &Mask,
5740 SmallVectorImpl<SDValue> &Ops) {
5741 bool IsUnary;
5742 if (!isTargetShuffle(N.getOpcode()))
5743 return false;
5744
5745 MVT VT = N.getSimpleValueType();
5746 if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary))
5747 return false;
5748
5749 SDValue V1 = Ops[0];
5750 SDValue V2 = IsUnary ? V1 : Ops[1];
5751
5752 V1 = peekThroughBitcasts(V1);
5753 V2 = peekThroughBitcasts(V2);
5754
5755 assert((VT.getSizeInBits() % Mask.size()) == 0 &&(((VT.getSizeInBits() % Mask.size()) == 0 && "Illegal split of shuffle value type"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % Mask.size()) == 0 && \"Illegal split of shuffle value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5756, __PRETTY_FUNCTION__))
5756 "Illegal split of shuffle value type")(((VT.getSizeInBits() % Mask.size()) == 0 && "Illegal split of shuffle value type"
) ? static_cast<void> (0) : __assert_fail ("(VT.getSizeInBits() % Mask.size()) == 0 && \"Illegal split of shuffle value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5756, __PRETTY_FUNCTION__))
;
5757 unsigned EltSizeInBits = VT.getSizeInBits() / Mask.size();
5758
5759 // Extract known constant input data.
5760 APInt UndefSrcElts[2];
5761 SmallVector<APInt, 32> SrcEltBits[2];
5762 bool IsSrcConstant[2] = {
5763 getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0],
5764 SrcEltBits[0], true, false),
5765 getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1],
5766 SrcEltBits[1], true, false)};
5767
5768 for (int i = 0, Size = Mask.size(); i < Size; ++i) {
5769 int M = Mask[i];
5770
5771 // Already decoded as SM_SentinelZero / SM_SentinelUndef.
5772 if (M < 0)
5773 continue;
5774
5775 // Determine shuffle input and normalize the mask.
5776 unsigned SrcIdx = M / Size;
5777 SDValue V = M < Size ? V1 : V2;
5778 M %= Size;
5779
5780 // We are referencing an UNDEF input.
5781 if (V.isUndef()) {
5782 Mask[i] = SM_SentinelUndef;
5783 continue;
5784 }
5785
5786 // SCALAR_TO_VECTOR - only the first element is defined, and the rest UNDEF.
5787 // TODO: We currently only set UNDEF for integer types - floats use the same
5788 // registers as vectors and many of the scalar folded loads rely on the
5789 // SCALAR_TO_VECTOR pattern.
5790 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
5791 (Size % V.getValueType().getVectorNumElements()) == 0) {
5792 int Scale = Size / V.getValueType().getVectorNumElements();
5793 int Idx = M / Scale;
5794 if (Idx != 0 && !VT.isFloatingPoint())
5795 Mask[i] = SM_SentinelUndef;
5796 else if (Idx == 0 && X86::isZeroNode(V.getOperand(0)))
5797 Mask[i] = SM_SentinelZero;
5798 continue;
5799 }
5800
5801 // Attempt to extract from the source's constant bits.
5802 if (IsSrcConstant[SrcIdx]) {
5803 if (UndefSrcElts[SrcIdx][M])
5804 Mask[i] = SM_SentinelUndef;
5805 else if (SrcEltBits[SrcIdx][M] == 0)
5806 Mask[i] = SM_SentinelZero;
5807 }
5808 }
5809
5810 assert(VT.getVectorNumElements() == Mask.size() &&((VT.getVectorNumElements() == Mask.size() && "Different mask size from vector size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == Mask.size() && \"Different mask size from vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5811, __PRETTY_FUNCTION__))
5811 "Different mask size from vector size!")((VT.getVectorNumElements() == Mask.size() && "Different mask size from vector size!"
) ? static_cast<void> (0) : __assert_fail ("VT.getVectorNumElements() == Mask.size() && \"Different mask size from vector size!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5811, __PRETTY_FUNCTION__))
;
5812 return true;
5813}
5814
5815// Attempt to decode ops that could be represented as a shuffle mask.
5816// The decoded shuffle mask may contain a different number of elements to the
5817// destination value type.
5818static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
5819 SmallVectorImpl<SDValue> &Ops) {
5820 Mask.clear();
5821 Ops.clear();
5822
5823 MVT VT = N.getSimpleValueType();
5824 unsigned NumElts = VT.getVectorNumElements();
5825 unsigned NumSizeInBits = VT.getSizeInBits();
5826 unsigned NumBitsPerElt = VT.getScalarSizeInBits();
5827 assert((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 &&(((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0
&& "Expected byte aligned value types") ? static_cast
<void> (0) : __assert_fail ("(NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 && \"Expected byte aligned value types\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5828, __PRETTY_FUNCTION__))
5828 "Expected byte aligned value types")(((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0
&& "Expected byte aligned value types") ? static_cast
<void> (0) : __assert_fail ("(NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 && \"Expected byte aligned value types\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5828, __PRETTY_FUNCTION__))
;
5829
5830 unsigned Opcode = N.getOpcode();
5831 switch (Opcode) {
5832 case ISD::AND:
5833 case X86ISD::ANDNP: {
5834 // Attempt to decode as a per-byte mask.
5835 APInt UndefElts;
5836 SmallVector<APInt, 32> EltBits;
5837 SDValue N0 = N.getOperand(0);
5838 SDValue N1 = N.getOperand(1);
5839 bool IsAndN = (X86ISD::ANDNP == Opcode);
5840 uint64_t ZeroMask = IsAndN ? 255 : 0;
5841 if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits))
5842 return false;
5843 for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
5844 if (UndefElts[i]) {
5845 Mask.push_back(SM_SentinelUndef);
5846 continue;
5847 }
5848 uint64_t ByteBits = EltBits[i].getZExtValue();
5849 if (ByteBits != 0 && ByteBits != 255)
5850 return false;
5851 Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i);
5852 }
5853 Ops.push_back(IsAndN ? N1 : N0);
5854 return true;
5855 }
5856 case ISD::SCALAR_TO_VECTOR: {
5857 // Match against a scalar_to_vector of an extract from a similar vector.
5858 SDValue N0 = N.getOperand(0);
5859 if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5860 N0.getOperand(0).getValueType() != VT ||
5861 !isa<ConstantSDNode>(N0.getOperand(1)) ||
5862 NumElts <= N0.getConstantOperandVal(1) ||
5863 !N->isOnlyUserOf(N0.getNode()))
5864 return false;
5865 Ops.push_back(N0.getOperand(0));
5866 Mask.push_back(N0.getConstantOperandVal(1));
5867 Mask.append(NumElts - 1, SM_SentinelUndef);
5868 return true;
5869 }
5870 case X86ISD::PINSRB:
5871 case X86ISD::PINSRW: {
5872 SDValue InVec = N.getOperand(0);
5873 SDValue InScl = N.getOperand(1);
5874 uint64_t InIdx = N.getConstantOperandVal(2);
5875 assert(InIdx < NumElts && "Illegal insertion index")((InIdx < NumElts && "Illegal insertion index") ? static_cast
<void> (0) : __assert_fail ("InIdx < NumElts && \"Illegal insertion index\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5875, __PRETTY_FUNCTION__))
;
5876
5877 // Attempt to recognise a PINSR*(VEC, 0, Idx) shuffle pattern.
5878 if (X86::isZeroNode(InScl)) {
5879 Ops.push_back(InVec);
5880 for (unsigned i = 0; i != NumElts; ++i)
5881 Mask.push_back(i == InIdx ? SM_SentinelZero : (int)i);
5882 return true;
5883 }
5884
5885 // Attempt to recognise a PINSR*(ASSERTZEXT(PEXTR*)) shuffle pattern.
5886 // TODO: Expand this to support INSERT_VECTOR_ELT/etc.
5887 unsigned ExOp =
5888 (X86ISD::PINSRB == Opcode ? X86ISD::PEXTRB : X86ISD::PEXTRW);
5889 if (InScl.getOpcode() != ISD::AssertZext ||
5890 InScl.getOperand(0).getOpcode() != ExOp)
5891 return false;
5892
5893 SDValue ExVec = InScl.getOperand(0).getOperand(0);
5894 uint64_t ExIdx = InScl.getOperand(0).getConstantOperandVal(1);
5895 assert(ExIdx < NumElts && "Illegal extraction index")((ExIdx < NumElts && "Illegal extraction index") ?
static_cast<void> (0) : __assert_fail ("ExIdx < NumElts && \"Illegal extraction index\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 5895, __PRETTY_FUNCTION__))
;
5896 Ops.push_back(InVec);
5897 Ops.push_back(ExVec);
5898 for (unsigned i = 0; i != NumElts; ++i)
5899 Mask.push_back(i == InIdx ? NumElts + ExIdx : i);
5900 return true;
5901 }
5902 case X86ISD::VSHLI:
5903 case X86ISD::VSRLI: {
5904 uint64_t ShiftVal = N.getConstantOperandVal(1);
5905 // Out of range bit shifts are guaranteed to be zero.
5906 if (NumBitsPerElt <= ShiftVal) {
5907 Mask.append(NumElts, SM_SentinelZero);
5908 return true;
5909 }
5910
5911 // We can only decode 'whole byte' bit shifts as shuffles.
5912 if ((ShiftVal % 8) != 0)
5913 break;
5914
5915 uint64_t ByteShift = ShiftVal / 8;
5916 unsigned NumBytes = NumSizeInBits / 8;
5917 unsigned NumBytesPerElt = NumBitsPerElt / 8;
5918 Ops.push_back(N.getOperand(0));
5919
5920 // Clear mask to all zeros and insert the shifted byte indices.
5921 Mask.append(NumBytes, SM_SentinelZero);
5922
5923 if (X86ISD::VSHLI == Opcode) {
5924 for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
5925 for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
5926 Mask[i + j] = i + j - ByteShift;
5927 } else {
5928 for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
5929 for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
5930 Mask[i + j - ByteShift] = i + j;
5931 }
5932 return true;
5933 }
5934 case ISD::ZERO_EXTEND_VECTOR_INREG:
5935 case X86ISD::VZEXT: {
5936 // TODO - add support for VPMOVZX with smaller input vector types.
5937 SDValue Src = N.getOperand(0);
5938 MVT SrcVT = Src.getSimpleValueType();
5939 if (NumSizeInBits != SrcVT.getSizeInBits())
5940 break;
5941 DecodeZeroExtendMask(SrcVT.getScalarType(), VT, Mask);
5942 Ops.push_back(Src);
5943 return true;
5944 }
5945 }
5946
5947 return false;
5948}
5949
5950/// Removes unused shuffle source inputs and adjusts the shuffle mask accordingly.
5951static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs,
5952 SmallVectorImpl<int> &Mask) {
5953 int MaskWidth = Mask.size();
5954 SmallVector<SDValue, 16> UsedInputs;
5955 for (int i = 0, e = Inputs.size(); i < e; ++i) {
5956 int lo = UsedInputs.size() * MaskWidth;
5957 int hi = lo + MaskWidth;
5958 if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
5959 UsedInputs.push_back(Inputs[i]);
5960 continue;
5961 }
5962 for (int &M : Mask)
5963 if (lo <= M)
5964 M -= MaskWidth;
5965 }
5966 Inputs = UsedInputs;
5967}
5968
5969/// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs
5970/// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the
5971/// remaining input indices in case we now have a unary shuffle and adjust the
5972/// inputs accordingly.
5973/// Returns true if the target shuffle mask was decoded.
5974static bool resolveTargetShuffleInputs(SDValue Op,
5975 SmallVectorImpl<SDValue> &Inputs,
5976 SmallVectorImpl<int> &Mask) {
5977 if (!setTargetShuffleZeroElements(Op, Mask, Inputs))
5978 if (!getFauxShuffleMask(Op, Mask, Inputs))
5979 return false;
5980
5981 resolveTargetShuffleInputsAndMask(Inputs, Mask);
5982 return true;
5983}
5984
5985/// Returns the scalar element that will make up the ith
5986/// element of the result of the vector shuffle.
5987static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
5988 unsigned Depth) {
5989 if (Depth == 6)
5990 return SDValue(); // Limit search depth.
5991
5992 SDValue V = SDValue(N, 0);
5993 EVT VT = V.getValueType();
5994 unsigned Opcode = V.getOpcode();
5995
5996 // Recurse into ISD::VECTOR_SHUFFLE node to find scalars.
5997 if (const ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(N)) {
5998 int Elt = SV->getMaskElt(Index);
5999
6000 if (Elt < 0)
6001 return DAG.getUNDEF(VT.getVectorElementType());
6002
6003 unsigned NumElems = VT.getVectorNumElements();
6004 SDValue NewV = (Elt < (int)NumElems) ? SV->getOperand(0)
6005 : SV->getOperand(1);
6006 return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG, Depth+1);
6007 }
6008
6009 // Recurse into target specific vector shuffles to find scalars.
6010 if (isTargetShuffle(Opcode)) {
6011 MVT ShufVT = V.getSimpleValueType();
6012 MVT ShufSVT = ShufVT.getVectorElementType();
6013 int NumElems = (int)ShufVT.getVectorNumElements();
6014 SmallVector<int, 16> ShuffleMask;
6015 SmallVector<SDValue, 16> ShuffleOps;
6016 bool IsUnary;
6017
6018 if (!getTargetShuffleMask(N, ShufVT, true, ShuffleOps, ShuffleMask, IsUnary))
6019 return SDValue();
6020
6021 int Elt = ShuffleMask[Index];
6022 if (Elt == SM_SentinelZero)
6023 return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(N), ShufSVT)
6024 : DAG.getConstantFP(+0.0, SDLoc(N), ShufSVT);
6025 if (Elt == SM_SentinelUndef)
6026 return DAG.getUNDEF(ShufSVT);
6027
6028 assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range")((0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range"
) ? static_cast<void> (0) : __assert_fail ("0 <= Elt && Elt < (2*NumElems) && \"Shuffle index out of range\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6028, __PRETTY_FUNCTION__))
;
6029 SDValue NewV = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1];
6030 return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
6031 Depth+1);
6032 }
6033
6034 // Actual nodes that may contain scalar elements
6035 if (Opcode == ISD::BITCAST) {
6036 V = V.getOperand(0);
6037 EVT SrcVT = V.getValueType();
6038 unsigned NumElems = VT.getVectorNumElements();
6039
6040 if (!SrcVT.isVector() || SrcVT.getVectorNumElements() != NumElems)
6041 return SDValue();
6042 }
6043
6044 if (V.getOpcode() == ISD::SCALAR_TO_VECTOR)
6045 return (Index == 0) ? V.getOperand(0)
6046 : DAG.getUNDEF(VT.getVectorElementType());
6047
6048 if (V.getOpcode() == ISD::BUILD_VECTOR)
6049 return V.getOperand(Index);
6050
6051 return SDValue();
6052}
6053
6054/// Custom lower build_vector of v16i8.
6055static SDValue LowerBuildVectorv16i8(SDValue Op, unsigned NonZeros,
6056 unsigned NumNonZero, unsigned NumZero,
6057 SelectionDAG &DAG,
6058 const X86Subtarget &Subtarget) {
6059 if (NumNonZero > 8 && !Subtarget.hasSSE41())
6060 return SDValue();
6061
6062 SDLoc dl(Op);
6063 SDValue V;
6064 bool First = true;
6065
6066 // SSE4.1 - use PINSRB to insert each byte directly.
6067 if (Subtarget.hasSSE41()) {
6068 for (unsigned i = 0; i < 16; ++i) {
6069 bool IsNonZero = (NonZeros & (1 << i)) != 0;
6070 if (IsNonZero) {
6071 // If the build vector contains zeros or our first insertion is not the
6072 // first index then insert into zero vector to break any register
6073 // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
6074 if (First) {
6075 First = false;
6076 if (NumZero || 0 != i)
6077 V = getZeroVector(MVT::v16i8, Subtarget, DAG, dl);
6078 else {
6079 assert(0 == i && "Expected insertion into zero-index")((0 == i && "Expected insertion into zero-index") ? static_cast
<void> (0) : __assert_fail ("0 == i && \"Expected insertion into zero-index\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6079, __PRETTY_FUNCTION__))
;
6080 V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
6081 V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
6082 V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
6083 V = DAG.getBitcast(MVT::v16i8, V);
6084 continue;
6085 }
6086 }
6087 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v16i8, V,
6088 Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
6089 }
6090 }
6091
6092 return V;
6093 }
6094
6095 // Pre-SSE4.1 - merge byte pairs and insert with PINSRW.
6096 for (unsigned i = 0; i < 16; ++i) {
6097 bool ThisIsNonZero = (NonZeros & (1 << i)) != 0;
6098 if (ThisIsNonZero && First) {
6099 if (NumZero)
6100 V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
6101 else
6102 V = DAG.getUNDEF(MVT::v8i16);
6103 First = false;
6104 }
6105
6106 if ((i & 1) != 0) {
6107 // FIXME: Investigate extending to i32 instead of just i16.
6108 // FIXME: Investigate combining the first 4 bytes as a i32 instead.
6109 SDValue ThisElt, LastElt;
6110 bool LastIsNonZero = (NonZeros & (1 << (i - 1))) != 0;
6111 if (LastIsNonZero) {
6112 LastElt =
6113 DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i - 1));
6114 }
6115 if (ThisIsNonZero) {
6116 ThisElt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, Op.getOperand(i));
6117 ThisElt = DAG.getNode(ISD::SHL, dl, MVT::i16, ThisElt,
6118 DAG.getConstant(8, dl, MVT::i8));
6119 if (LastIsNonZero)
6120 ThisElt = DAG.getNode(ISD::OR, dl, MVT::i16, ThisElt, LastElt);
6121 } else
6122 ThisElt = LastElt;
6123
6124 if (ThisElt) {
6125 if (1 == i) {
6126 V = NumZero ? DAG.getZExtOrTrunc(ThisElt, dl, MVT::i32)
6127 : DAG.getAnyExtOrTrunc(ThisElt, dl, MVT::i32);
6128 V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
6129 V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
6130 V = DAG.getBitcast(MVT::v8i16, V);
6131 } else {
6132 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, ThisElt,
6133 DAG.getIntPtrConstant(i / 2, dl));
6134 }
6135 }
6136 }
6137 }
6138
6139 return DAG.getBitcast(MVT::v16i8, V);
6140}
6141
6142/// Custom lower build_vector of v8i16.
6143static SDValue LowerBuildVectorv8i16(SDValue Op, unsigned NonZeros,
6144 unsigned NumNonZero, unsigned NumZero,
6145 SelectionDAG &DAG,
6146 const X86Subtarget &Subtarget) {
6147 if (NumNonZero > 4 && !Subtarget.hasSSE41())
6148 return SDValue();
6149
6150 SDLoc dl(Op);
6151 SDValue V;
6152 bool First = true;
6153 for (unsigned i = 0; i < 8; ++i) {
6154 bool IsNonZero = (NonZeros & (1 << i)) != 0;
6155 if (IsNonZero) {
6156 // If the build vector contains zeros or our first insertion is not the
6157 // first index then insert into zero vector to break any register
6158 // dependency else use SCALAR_TO_VECTOR/VZEXT_MOVL.
6159 if (First) {
6160 First = false;
6161 if (NumZero || 0 != i)
6162 V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl);
6163 else {
6164 assert(0 == i && "Expected insertion into zero-index")((0 == i && "Expected insertion into zero-index") ? static_cast
<void> (0) : __assert_fail ("0 == i && \"Expected insertion into zero-index\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6164, __PRETTY_FUNCTION__))
;
6165 V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32);
6166 V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V);
6167 V = DAG.getNode(X86ISD::VZEXT_MOVL, dl, MVT::v4i32, V);
6168 V = DAG.getBitcast(MVT::v8i16, V);
6169 continue;
6170 }
6171 }
6172 V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V,
6173 Op.getOperand(i), DAG.getIntPtrConstant(i, dl));
6174 }
6175 }
6176
6177 return V;
6178}
6179
6180/// Custom lower build_vector of v4i32 or v4f32.
6181static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG,
6182 const X86Subtarget &Subtarget) {
6183 // Find all zeroable elements.
6184 std::bitset<4> Zeroable;
6185 for (int i=0; i < 4; ++i) {
6186 SDValue Elt = Op->getOperand(i);
6187 Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt));
6188 }
6189 assert(Zeroable.size() - Zeroable.count() > 1 &&((Zeroable.size() - Zeroable.count() > 1 && "We expect at least two non-zero elements!"
) ? static_cast<void> (0) : __assert_fail ("Zeroable.size() - Zeroable.count() > 1 && \"We expect at least two non-zero elements!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6190, __PRETTY_FUNCTION__))
6190 "We expect at least two non-zero elements!")((Zeroable.size() - Zeroable.count() > 1 && "We expect at least two non-zero elements!"
) ? static_cast<void> (0) : __assert_fail ("Zeroable.size() - Zeroable.count() > 1 && \"We expect at least two non-zero elements!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6190, __PRETTY_FUNCTION__))
;
6191
6192 // We only know how to deal with build_vector nodes where elements are either
6193 // zeroable or extract_vector_elt with constant index.
6194 SDValue FirstNonZero;
6195 unsigned FirstNonZeroIdx;
6196 for (unsigned i=0; i < 4; ++i) {
6197 if (Zeroable[i])
6198 continue;
6199 SDValue Elt = Op->getOperand(i);
6200 if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6201 !isa<ConstantSDNode>(Elt.getOperand(1)))
6202 return SDValue();
6203 // Make sure that this node is extracting from a 128-bit vector.
6204 MVT VT = Elt.getOperand(0).getSimpleValueType();
6205 if (!VT.is128BitVector())
6206 return SDValue();
6207 if (!FirstNonZero.getNode()) {
6208 FirstNonZero = Elt;
6209 FirstNonZeroIdx = i;
6210 }
6211 }
6212
6213 assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!")((FirstNonZero.getNode() && "Unexpected build vector of all zeros!"
) ? static_cast<void> (0) : __assert_fail ("FirstNonZero.getNode() && \"Unexpected build vector of all zeros!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6213, __PRETTY_FUNCTION__))
;
6214 SDValue V1 = FirstNonZero.getOperand(0);
6215 MVT VT = V1.getSimpleValueType();
6216
6217 // See if this build_vector can be lowered as a blend with zero.
6218 SDValue Elt;
6219 unsigned EltMaskIdx, EltIdx;
6220 int Mask[4];
6221 for (EltIdx = 0; EltIdx < 4; ++EltIdx) {
6222 if (Zeroable[EltIdx]) {
6223 // The zero vector will be on the right hand side.
6224 Mask[EltIdx] = EltIdx+4;
6225 continue;
6226 }
6227
6228 Elt = Op->getOperand(EltIdx);
6229 // By construction, Elt is a EXTRACT_VECTOR_ELT with constant index.
6230 EltMaskIdx = Elt.getConstantOperandVal(1);
6231 if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx)
6232 break;
6233 Mask[EltIdx] = EltIdx;
6234 }
6235
6236 if (EltIdx == 4) {
6237 // Let the shuffle legalizer deal with blend operations.
6238 SDValue VZero = getZeroVector(VT, Subtarget, DAG, SDLoc(Op));
6239 if (V1.getSimpleValueType() != VT)
6240 V1 = DAG.getBitcast(VT, V1);
6241 return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZero, Mask);
6242 }
6243
6244 // See if we can lower this build_vector to a INSERTPS.
6245 if (!Subtarget.hasSSE41())
6246 return SDValue();
6247
6248 SDValue V2 = Elt.getOperand(0);
6249 if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx)
6250 V1 = SDValue();
6251
6252 bool CanFold = true;
6253 for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) {
6254 if (Zeroable[i])
6255 continue;
6256
6257 SDValue Current = Op->getOperand(i);
6258 SDValue SrcVector = Current->getOperand(0);
6259 if (!V1.getNode())
6260 V1 = SrcVector;
6261 CanFold = (SrcVector == V1) && (Current.getConstantOperandVal(1) == i);
6262 }
6263
6264 if (!CanFold)
6265 return SDValue();
6266
6267 assert(V1.getNode() && "Expected at least two non-zero elements!")((V1.getNode() && "Expected at least two non-zero elements!"
) ? static_cast<void> (0) : __assert_fail ("V1.getNode() && \"Expected at least two non-zero elements!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6267, __PRETTY_FUNCTION__))
;
6268 if (V1.getSimpleValueType() != MVT::v4f32)
6269 V1 = DAG.getBitcast(MVT::v4f32, V1);
6270 if (V2.getSimpleValueType() != MVT::v4f32)
6271 V2 = DAG.getBitcast(MVT::v4f32, V2);
6272
6273 // Ok, we can emit an INSERTPS instruction.
6274 unsigned ZMask = Zeroable.to_ulong();
6275
6276 unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask;
6277 assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!")(((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"
) ? static_cast<void> (0) : __assert_fail ("(InsertPSMask & ~0xFFu) == 0 && \"Invalid mask!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6277, __PRETTY_FUNCTION__))
;
6278 SDLoc DL(Op);
6279 SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
6280 DAG.getIntPtrConstant(InsertPSMask, DL));
6281 return DAG.getBitcast(VT, Result);
6282}
6283
6284/// Return a vector logical shift node.
6285static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits,
6286 SelectionDAG &DAG, const TargetLowering &TLI,
6287 const SDLoc &dl) {
6288 assert(VT.is128BitVector() && "Unknown type for VShift")((VT.is128BitVector() && "Unknown type for VShift") ?
static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && \"Unknown type for VShift\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6288, __PRETTY_FUNCTION__))
;
6289 MVT ShVT = MVT::v16i8;
6290 unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ;
6291 SrcOp = DAG.getBitcast(ShVT, SrcOp);
6292 MVT ScalarShiftTy = TLI.getScalarShiftAmountTy(DAG.getDataLayout(), VT);
6293 assert(NumBits % 8 == 0 && "Only support byte sized shifts")((NumBits % 8 == 0 && "Only support byte sized shifts"
) ? static_cast<void> (0) : __assert_fail ("NumBits % 8 == 0 && \"Only support byte sized shifts\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6293, __PRETTY_FUNCTION__))
;
6294 SDValue ShiftVal = DAG.getConstant(NumBits/8, dl, ScalarShiftTy);
6295 return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal));
6296}
6297
6298static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
6299 SelectionDAG &DAG) {
6300
6301 // Check if the scalar load can be widened into a vector load. And if
6302 // the address is "base + cst" see if the cst can be "absorbed" into
6303 // the shuffle mask.
6304 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
6305 SDValue Ptr = LD->getBasePtr();
6306 if (!ISD::isNormalLoad(LD) || LD->isVolatile())
6307 return SDValue();
6308 EVT PVT = LD->getValueType(0);
6309 if (PVT != MVT::i32 && PVT != MVT::f32)
6310 return SDValue();
6311
6312 int FI = -1;
6313 int64_t Offset = 0;
6314 if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
6315 FI = FINode->getIndex();
6316 Offset = 0;
6317 } else if (DAG.isBaseWithConstantOffset(Ptr) &&
6318 isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
6319 FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
6320 Offset = Ptr.getConstantOperandVal(1);
6321 Ptr = Ptr.getOperand(0);
6322 } else {
6323 return SDValue();
6324 }
6325
6326 // FIXME: 256-bit vector instructions don't require a strict alignment,
6327 // improve this code to support it better.
6328 unsigned RequiredAlign = VT.getSizeInBits()/8;
6329 SDValue Chain = LD->getChain();
6330 // Make sure the stack object alignment is at least 16 or 32.
6331 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
6332 if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
6333 if (MFI.isFixedObjectIndex(FI)) {
6334 // Can't change the alignment. FIXME: It's possible to compute
6335 // the exact stack offset and reference FI + adjust offset instead.
6336 // If someone *really* cares about this. That's the way to implement it.
6337 return SDValue();
6338 } else {
6339 MFI.setObjectAlignment(FI, RequiredAlign);
6340 }
6341 }
6342
6343 // (Offset % 16 or 32) must be multiple of 4. Then address is then
6344 // Ptr + (Offset & ~15).
6345 if (Offset < 0)
6346 return SDValue();
6347 if ((Offset % RequiredAlign) & 3)
6348 return SDValue();
6349 int64_t StartOffset = Offset & ~int64_t(RequiredAlign - 1);
6350 if (StartOffset) {
6351 SDLoc DL(Ptr);
6352 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
6353 DAG.getConstant(StartOffset, DL, Ptr.getValueType()));
6354 }
6355
6356 int EltNo = (Offset - StartOffset) >> 2;
6357 unsigned NumElems = VT.getVectorNumElements();
6358
6359 EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems);
6360 SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr,
6361 LD->getPointerInfo().getWithOffset(StartOffset));
6362
6363 SmallVector<int, 8> Mask(NumElems, EltNo);
6364
6365 return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), Mask);
6366 }
6367
6368 return SDValue();
6369}
6370
6371/// Given the initializing elements 'Elts' of a vector of type 'VT', see if the
6372/// elements can be replaced by a single large load which has the same value as
6373/// a build_vector or insert_subvector whose loaded operands are 'Elts'.
6374///
6375/// Example: <load i32 *a, load i32 *a+4, zero, undef> -> zextload a
6376static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
6377 const SDLoc &DL, SelectionDAG &DAG,
6378 bool isAfterLegalize) {
6379 unsigned NumElems = Elts.size();
6380
6381 int LastLoadedElt = -1;
6382 SmallBitVector LoadMask(NumElems, false);
6383 SmallBitVector ZeroMask(NumElems, false);
6384 SmallBitVector UndefMask(NumElems, false);
6385
6386 // For each element in the initializer, see if we've found a load, zero or an
6387 // undef.
6388 for (unsigned i = 0; i < NumElems; ++i) {
6389 SDValue Elt = peekThroughBitcasts(Elts[i]);
6390 if (!Elt.getNode())
6391 return SDValue();
6392
6393 if (Elt.isUndef())
6394 UndefMask[i] = true;
6395 else if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode()))
6396 ZeroMask[i] = true;
6397 else if (ISD::isNON_EXTLoad(Elt.getNode())) {
6398 LoadMask[i] = true;
6399 LastLoadedElt = i;
6400 // Each loaded element must be the correct fractional portion of the
6401 // requested vector load.
6402 if ((NumElems * Elt.getValueSizeInBits()) != VT.getSizeInBits())
6403 return SDValue();
6404 } else
6405 return SDValue();
6406 }
6407 assert((ZeroMask | UndefMask | LoadMask).count() == NumElems &&(((ZeroMask | UndefMask | LoadMask).count() == NumElems &&
"Incomplete element masks") ? static_cast<void> (0) : __assert_fail
("(ZeroMask | UndefMask | LoadMask).count() == NumElems && \"Incomplete element masks\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6408, __PRETTY_FUNCTION__))
6408 "Incomplete element masks")(((ZeroMask | UndefMask | LoadMask).count() == NumElems &&
"Incomplete element masks") ? static_cast<void> (0) : __assert_fail
("(ZeroMask | UndefMask | LoadMask).count() == NumElems && \"Incomplete element masks\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6408, __PRETTY_FUNCTION__))
;
6409
6410 // Handle Special Cases - all undef or undef/zero.
6411 if (UndefMask.count() == NumElems)
6412 return DAG.getUNDEF(VT);
6413
6414 // FIXME: Should we return this as a BUILD_VECTOR instead?
6415 if ((ZeroMask | UndefMask).count() == NumElems)
6416 return VT.isInteger() ? DAG.getConstant(0, DL, VT)
6417 : DAG.getConstantFP(0.0, DL, VT);
6418
6419 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6420 int FirstLoadedElt = LoadMask.find_first();
6421 SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]);
6422 LoadSDNode *LDBase = cast<LoadSDNode>(EltBase);
6423 EVT LDBaseVT = EltBase.getValueType();
6424
6425 // Consecutive loads can contain UNDEFS but not ZERO elements.
6426 // Consecutive loads with UNDEFs and ZEROs elements require a
6427 // an additional shuffle stage to clear the ZERO elements.
6428 bool IsConsecutiveLoad = true;
6429 bool IsConsecutiveLoadWithZeros = true;
6430 for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) {
6431 if (LoadMask[i]) {
6432 SDValue Elt = peekThroughBitcasts(Elts[i]);
6433 LoadSDNode *LD = cast<LoadSDNode>(Elt);
6434 if (!DAG.areNonVolatileConsecutiveLoads(
6435 LD, LDBase, Elt.getValueType().getStoreSizeInBits() / 8,
6436 i - FirstLoadedElt)) {
6437 IsConsecutiveLoad = false;
6438 IsConsecutiveLoadWithZeros = false;
6439 break;
6440 }
6441 } else if (ZeroMask[i]) {
6442 IsConsecutiveLoad = false;
6443 }
6444 }
6445
6446 auto CreateLoad = [&DAG, &DL](EVT VT, LoadSDNode *LDBase) {
6447 auto MMOFlags = LDBase->getMemOperand()->getFlags();
6448 assert(!(MMOFlags & MachineMemOperand::MOVolatile) &&((!(MMOFlags & MachineMemOperand::MOVolatile) && "Cannot merge volatile loads."
) ? static_cast<void> (0) : __assert_fail ("!(MMOFlags & MachineMemOperand::MOVolatile) && \"Cannot merge volatile loads.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6449, __PRETTY_FUNCTION__))
6449 "Cannot merge volatile loads.")((!(MMOFlags & MachineMemOperand::MOVolatile) && "Cannot merge volatile loads."
) ? static_cast<void> (0) : __assert_fail ("!(MMOFlags & MachineMemOperand::MOVolatile) && \"Cannot merge volatile loads.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6449, __PRETTY_FUNCTION__))
;
6450 SDValue NewLd =
6451 DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(),
6452 LDBase->getPointerInfo(), LDBase->getAlignment(), MMOFlags);
6453
6454 if (LDBase->hasAnyUseOfValue(1)) {
6455 SDValue NewChain =
6456 DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1),
6457 SDValue(NewLd.getNode(), 1));
6458 DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
6459 DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
6460 SDValue(NewLd.getNode(), 1));
6461 }
6462
6463 return NewLd;
6464 };
6465
6466 // LOAD - all consecutive load/undefs (must start/end with a load).
6467 // If we have found an entire vector of loads and undefs, then return a large
6468 // load of the entire vector width starting at the base pointer.
6469 // If the vector contains zeros, then attempt to shuffle those elements.
6470 if (FirstLoadedElt == 0 && LastLoadedElt == (int)(NumElems - 1) &&
6471 (IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) {
6472 assert(LDBase && "Did not find base load for merging consecutive loads")((LDBase && "Did not find base load for merging consecutive loads"
) ? static_cast<void> (0) : __assert_fail ("LDBase && \"Did not find base load for merging consecutive loads\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6472, __PRETTY_FUNCTION__))
;
6473 EVT EltVT = LDBase->getValueType(0);
6474 // Ensure that the input vector size for the merged loads matches the
6475 // cumulative size of the input elements.
6476 if (VT.getSizeInBits() != EltVT.getSizeInBits() * NumElems)
6477 return SDValue();
6478
6479 if (isAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT))
6480 return SDValue();
6481
6482 if (IsConsecutiveLoad)
6483 return CreateLoad(VT, LDBase);
6484
6485 // IsConsecutiveLoadWithZeros - we need to create a shuffle of the loaded
6486 // vector and a zero vector to clear out the zero elements.
6487 if (!isAfterLegalize && NumElems == VT.getVectorNumElements()) {
6488 SmallVector<int, 4> ClearMask(NumElems, -1);
6489 for (unsigned i = 0; i < NumElems; ++i) {
6490 if (ZeroMask[i])
6491 ClearMask[i] = i + NumElems;
6492 else if (LoadMask[i])
6493 ClearMask[i] = i;
6494 }
6495 SDValue V = CreateLoad(VT, LDBase);
6496 SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT)
6497 : DAG.getConstantFP(0.0, DL, VT);
6498 return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask);
6499 }
6500 }
6501
6502 int LoadSize =
6503 (1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits();
6504
6505 // VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
6506 if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
6507 (LoadSize == 32 || LoadSize == 64) &&
6508 ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
6509 MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSize)
6510 : MVT::getIntegerVT(LoadSize);
6511 MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize);
6512 if (TLI.isTypeLegal(VecVT)) {
6513 SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
6514 SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
6515 SDValue ResNode =
6516 DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT,
6517 LDBase->getPointerInfo(),
6518 LDBase->getAlignment(),
6519 false/*isVolatile*/, true/*ReadMem*/,
6520 false/*WriteMem*/);
6521
6522 // Make sure the newly-created LOAD is in the same position as LDBase in
6523 // terms of dependency. We create a TokenFactor for LDBase and ResNode,
6524 // and update uses of LDBase's output chain to use the TokenFactor.
6525 if (LDBase->hasAnyUseOfValue(1)) {
6526 SDValue NewChain =
6527 DAG.getNode(ISD::TokenFactor, DL, MVT::Other, SDValue(LDBase, 1),
6528 SDValue(ResNode.getNode(), 1));
6529 DAG.ReplaceAllUsesOfValueWith(SDValue(LDBase, 1), NewChain);
6530 DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(LDBase, 1),
6531 SDValue(ResNode.getNode(), 1));
6532 }
6533
6534 return DAG.getBitcast(VT, ResNode);
6535 }
6536 }
6537
6538 return SDValue();
6539}
6540
6541static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
6542 unsigned SplatBitSize, LLVMContext &C) {
6543 unsigned ScalarSize = VT.getScalarSizeInBits();
6544 unsigned NumElm = SplatBitSize / ScalarSize;
6545
6546 SmallVector<Constant *, 32> ConstantVec;
6547 for (unsigned i = 0; i < NumElm; i++) {
6548 APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
6549 Constant *Const;
6550 if (VT.isFloatingPoint()) {
6551 assert((ScalarSize == 32 || ScalarSize == 64) &&(((ScalarSize == 32 || ScalarSize == 64) && "Unsupported floating point scalar size"
) ? static_cast<void> (0) : __assert_fail ("(ScalarSize == 32 || ScalarSize == 64) && \"Unsupported floating point scalar size\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6552, __PRETTY_FUNCTION__))
6552 "Unsupported floating point scalar size")(((ScalarSize == 32 || ScalarSize == 64) && "Unsupported floating point scalar size"
) ? static_cast<void> (0) : __assert_fail ("(ScalarSize == 32 || ScalarSize == 64) && \"Unsupported floating point scalar size\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6552, __PRETTY_FUNCTION__))
;
6553 if (ScalarSize == 32)
6554 Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat());
6555 else
6556 Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble());
6557 } else
6558 Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
6559 ConstantVec.push_back(Const);
6560 }
6561 return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
6562}
6563
6564static bool isUseOfShuffle(SDNode *N) {
6565 for (auto *U : N->uses()) {
6566 if (isTargetShuffle(U->getOpcode()))
6567 return true;
6568 if (U->getOpcode() == ISD::BITCAST) // Ignore bitcasts
6569 return isUseOfShuffle(U);
6570 }
6571 return false;
6572}
6573
6574/// Attempt to use the vbroadcast instruction to generate a splat value
6575/// from a splat BUILD_VECTOR which uses:
6576/// a. A single scalar load, or a constant.
6577/// b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>).
6578///
6579/// The VBROADCAST node is returned when a pattern is found,
6580/// or SDValue() otherwise.
6581static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
6582 const X86Subtarget &Subtarget,
6583 SelectionDAG &DAG) {
6584 // VBROADCAST requires AVX.
6585 // TODO: Splats could be generated for non-AVX CPUs using SSE
6586 // instructions, but there's less potential gain for only 128-bit vectors.
6587 if (!Subtarget.hasAVX())
6588 return SDValue();
6589
6590 MVT VT = BVOp->getSimpleValueType(0);
6591 SDLoc dl(BVOp);
6592
6593 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Unsupported vector type for broadcast.") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Unsupported vector type for broadcast.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6594, __PRETTY_FUNCTION__))
6594 "Unsupported vector type for broadcast.")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Unsupported vector type for broadcast.") ? static_cast
<void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Unsupported vector type for broadcast.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6594, __PRETTY_FUNCTION__))
;
6595
6596 BitVector UndefElements;
6597 SDValue Ld = BVOp->getSplatValue(&UndefElements);
6598
6599 // We need a splat of a single value to use broadcast, and it doesn't
6600 // make any sense if the value is only in one element of the vector.
6601 if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
6602 APInt SplatValue, Undef;
6603 unsigned SplatBitSize;
6604 bool HasUndef;
6605 // Check if this is a repeated constant pattern suitable for broadcasting.
6606 if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) &&
6607 SplatBitSize > VT.getScalarSizeInBits() &&
6608 SplatBitSize < VT.getSizeInBits()) {
6609 // Avoid replacing with broadcast when it's a use of a shuffle
6610 // instruction to preserve the present custom lowering of shuffles.
6611 if (isUseOfShuffle(BVOp) || BVOp->hasOneUse())
6612 return SDValue();
6613 // replace BUILD_VECTOR with broadcast of the repeated constants.
6614 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6615 LLVMContext *Ctx = DAG.getContext();
6616 MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
6617 if (Subtarget.hasAVX()) {
6618 if (SplatBitSize <= 64 && Subtarget.hasAVX2() &&
6619 !(SplatBitSize == 64 && Subtarget.is32Bit())) {
6620 // Splatted value can fit in one INTEGER constant in constant pool.
6621 // Load the constant and broadcast it.
6622 MVT CVT = MVT::getIntegerVT(SplatBitSize);
6623 Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize);
6624 Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue);
6625 SDValue CP = DAG.getConstantPool(C, PVT);
6626 unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
6627
6628 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6629 Ld = DAG.getLoad(
6630 CVT, dl, DAG.getEntryNode(), CP,
6631 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6632 Alignment);
6633 SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
6634 MVT::getVectorVT(CVT, Repeat), Ld);
6635 return DAG.getBitcast(VT, Brdcst);
6636 } else if (SplatBitSize == 32 || SplatBitSize == 64) {
6637 // Splatted value can fit in one FLOAT constant in constant pool.
6638 // Load the constant and broadcast it.
6639 // AVX have support for 32 and 64 bit broadcast for floats only.
6640 // No 64bit integer in 32bit subtarget.
6641 MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
6642 Constant *C = SplatBitSize == 32
6643 ? ConstantFP::get(Type::getFloatTy(*Ctx),
6644 SplatValue.bitsToFloat())
6645 : ConstantFP::get(Type::getDoubleTy(*Ctx),
6646 SplatValue.bitsToDouble());
6647 SDValue CP = DAG.getConstantPool(C, PVT);
6648 unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
6649
6650 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6651 Ld = DAG.getLoad(
6652 CVT, dl, DAG.getEntryNode(), CP,
6653 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6654 Alignment);
6655 SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
6656 MVT::getVectorVT(CVT, Repeat), Ld);
6657 return DAG.getBitcast(VT, Brdcst);
6658 } else if (SplatBitSize > 64) {
6659 // Load the vector of constants and broadcast it.
6660 MVT CVT = VT.getScalarType();
6661 Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
6662 *Ctx);
6663 SDValue VCP = DAG.getConstantPool(VecC, PVT);
6664 unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
6665 unsigned Alignment = cast<ConstantPoolSDNode>(VCP)->getAlignment();
6666 Ld = DAG.getLoad(
6667 MVT::getVectorVT(CVT, NumElm), dl, DAG.getEntryNode(), VCP,
6668 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6669 Alignment);
6670 SDValue Brdcst = DAG.getNode(X86ISD::SUBV_BROADCAST, dl, VT, Ld);
6671 return DAG.getBitcast(VT, Brdcst);
6672 }
6673 }
6674 }
6675 return SDValue();
6676 }
6677
6678 bool ConstSplatVal =
6679 (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP);
6680
6681 // Make sure that all of the users of a non-constant load are from the
6682 // BUILD_VECTOR node.
6683 if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
6684 return SDValue();
6685
6686 unsigned ScalarSize = Ld.getValueSizeInBits();
6687 bool IsGE256 = (VT.getSizeInBits() >= 256);
6688
6689 // When optimizing for size, generate up to 5 extra bytes for a broadcast
6690 // instruction to save 8 or more bytes of constant pool data.
6691 // TODO: If multiple splats are generated to load the same constant,
6692 // it may be detrimental to overall size. There needs to be a way to detect
6693 // that condition to know if this is truly a size win.
6694 bool OptForSize = DAG.getMachineFunction().getFunction()->optForSize();
6695
6696 // Handle broadcasting a single constant scalar from the constant pool
6697 // into a vector.
6698 // On Sandybridge (no AVX2), it is still better to load a constant vector
6699 // from the constant pool and not to broadcast it from a scalar.
6700 // But override that restriction when optimizing for size.
6701 // TODO: Check if splatting is recommended for other AVX-capable CPUs.
6702 if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) {
6703 EVT CVT = Ld.getValueType();
6704 assert(!CVT.isVector() && "Must not broadcast a vector type")((!CVT.isVector() && "Must not broadcast a vector type"
) ? static_cast<void> (0) : __assert_fail ("!CVT.isVector() && \"Must not broadcast a vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6704, __PRETTY_FUNCTION__))
;
6705
6706 // Splat f32, i32, v4f64, v4i64 in all cases with AVX2.
6707 // For size optimization, also splat v2f64 and v2i64, and for size opt
6708 // with AVX2, also splat i8 and i16.
6709 // With pattern matching, the VBROADCAST node may become a VMOVDDUP.
6710 if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
6711 (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) {
6712 const Constant *C = nullptr;
6713 if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld))
6714 C = CI->getConstantIntValue();
6715 else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld))
6716 C = CF->getConstantFPValue();
6717
6718 assert(C && "Invalid constant type")((C && "Invalid constant type") ? static_cast<void
> (0) : __assert_fail ("C && \"Invalid constant type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6718, __PRETTY_FUNCTION__))
;
6719
6720 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6721 SDValue CP =
6722 DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
6723 unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
6724 Ld = DAG.getLoad(
6725 CVT, dl, DAG.getEntryNode(), CP,
6726 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
6727 Alignment);
6728
6729 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6730 }
6731 }
6732
6733 bool IsLoad = ISD::isNormalLoad(Ld.getNode());
6734
6735 // Handle AVX2 in-register broadcasts.
6736 if (!IsLoad && Subtarget.hasInt256() &&
6737 (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)))
6738 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6739
6740 // The scalar source must be a normal load.
6741 if (!IsLoad)
6742 return SDValue();
6743
6744 if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) ||
6745 (Subtarget.hasVLX() && ScalarSize == 64))
6746 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6747
6748 // The integer check is needed for the 64-bit into 128-bit so it doesn't match
6749 // double since there is no vbroadcastsd xmm
6750 if (Subtarget.hasInt256() && Ld.getValueType().isInteger()) {
6751 if (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)
6752 return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld);
6753 }
6754
6755 // Unsupported broadcast.
6756 return SDValue();
6757}
6758
6759/// \brief For an EXTRACT_VECTOR_ELT with a constant index return the real
6760/// underlying vector and index.
6761///
6762/// Modifies \p ExtractedFromVec to the real vector and returns the real
6763/// index.
6764static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec,
6765 SDValue ExtIdx) {
6766 int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue();
6767 if (!isa<ShuffleVectorSDNode>(ExtractedFromVec))
6768 return Idx;
6769
6770 // For 256-bit vectors, LowerEXTRACT_VECTOR_ELT_SSE4 may have already
6771 // lowered this:
6772 // (extract_vector_elt (v8f32 %vreg1), Constant<6>)
6773 // to:
6774 // (extract_vector_elt (vector_shuffle<2,u,u,u>
6775 // (extract_subvector (v8f32 %vreg0), Constant<4>),
6776 // undef)
6777 // Constant<0>)
6778 // In this case the vector is the extract_subvector expression and the index
6779 // is 2, as specified by the shuffle.
6780 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec);
6781 SDValue ShuffleVec = SVOp->getOperand(0);
6782 MVT ShuffleVecVT = ShuffleVec.getSimpleValueType();
6783 assert(ShuffleVecVT.getVectorElementType() ==((ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType
().getVectorElementType()) ? static_cast<void> (0) : __assert_fail
("ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType().getVectorElementType()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6784, __PRETTY_FUNCTION__))
6784 ExtractedFromVec.getSimpleValueType().getVectorElementType())((ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType
().getVectorElementType()) ? static_cast<void> (0) : __assert_fail
("ShuffleVecVT.getVectorElementType() == ExtractedFromVec.getSimpleValueType().getVectorElementType()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6784, __PRETTY_FUNCTION__))
;
6785
6786 int ShuffleIdx = SVOp->getMaskElt(Idx);
6787 if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) {
6788 ExtractedFromVec = ShuffleVec;
6789 return ShuffleIdx;
6790 }
6791 return Idx;
6792}
6793
6794static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) {
6795 MVT VT = Op.getSimpleValueType();
6796
6797 // Skip if insert_vec_elt is not supported.
6798 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6799 if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT))
6800 return SDValue();
6801
6802 SDLoc DL(Op);
6803 unsigned NumElems = Op.getNumOperands();
6804
6805 SDValue VecIn1;
6806 SDValue VecIn2;
6807 SmallVector<unsigned, 4> InsertIndices;
6808 SmallVector<int, 8> Mask(NumElems, -1);
6809
6810 for (unsigned i = 0; i != NumElems; ++i) {
6811 unsigned Opc = Op.getOperand(i).getOpcode();
6812
6813 if (Opc == ISD::UNDEF)
6814 continue;
6815
6816 if (Opc != ISD::EXTRACT_VECTOR_ELT) {
6817 // Quit if more than 1 elements need inserting.
6818 if (InsertIndices.size() > 1)
6819 return SDValue();
6820
6821 InsertIndices.push_back(i);
6822 continue;
6823 }
6824
6825 SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0);
6826 SDValue ExtIdx = Op.getOperand(i).getOperand(1);
6827
6828 // Quit if non-constant index.
6829 if (!isa<ConstantSDNode>(ExtIdx))
6830 return SDValue();
6831 int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx);
6832
6833 // Quit if extracted from vector of different type.
6834 if (ExtractedFromVec.getValueType() != VT)
6835 return SDValue();
6836
6837 if (!VecIn1.getNode())
6838 VecIn1 = ExtractedFromVec;
6839 else if (VecIn1 != ExtractedFromVec) {
6840 if (!VecIn2.getNode())
6841 VecIn2 = ExtractedFromVec;
6842 else if (VecIn2 != ExtractedFromVec)
6843 // Quit if more than 2 vectors to shuffle
6844 return SDValue();
6845 }
6846
6847 if (ExtractedFromVec == VecIn1)
6848 Mask[i] = Idx;
6849 else if (ExtractedFromVec == VecIn2)
6850 Mask[i] = Idx + NumElems;
6851 }
6852
6853 if (!VecIn1.getNode())
6854 return SDValue();
6855
6856 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
6857 SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask);
6858
6859 for (unsigned Idx : InsertIndices)
6860 NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx),
6861 DAG.getIntPtrConstant(Idx, DL));
6862
6863 return NV;
6864}
6865
6866static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
6867 assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&((ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 && "Can not convert non-constant vector"
) ? static_cast<void> (0) : __assert_fail ("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6869, __PRETTY_FUNCTION__))
6868 Op.getScalarValueSizeInBits() == 1 &&((ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 && "Can not convert non-constant vector"
) ? static_cast<void> (0) : __assert_fail ("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6869, __PRETTY_FUNCTION__))
6869 "Can not convert non-constant vector")((ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
Op.getScalarValueSizeInBits() == 1 && "Can not convert non-constant vector"
) ? static_cast<void> (0) : __assert_fail ("ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && Op.getScalarValueSizeInBits() == 1 && \"Can not convert non-constant vector\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6869, __PRETTY_FUNCTION__))
;
6870 uint64_t Immediate = 0;
6871 for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
6872 SDValue In = Op.getOperand(idx);
6873 if (!In.isUndef())
6874 Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
6875 }
6876 SDLoc dl(Op);
6877 MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8));
6878 return DAG.getConstant(Immediate, dl, VT);
6879}
6880// Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
6881SDValue
6882X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const {
6883
6884 MVT VT = Op.getSimpleValueType();
6885 assert((VT.getVectorElementType() == MVT::i1) &&(((VT.getVectorElementType() == MVT::i1) && "Unexpected type in LowerBUILD_VECTORvXi1!"
) ? static_cast<void> (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) && \"Unexpected type in LowerBUILD_VECTORvXi1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6886, __PRETTY_FUNCTION__))
6886 "Unexpected type in LowerBUILD_VECTORvXi1!")(((VT.getVectorElementType() == MVT::i1) && "Unexpected type in LowerBUILD_VECTORvXi1!"
) ? static_cast<void> (0) : __assert_fail ("(VT.getVectorElementType() == MVT::i1) && \"Unexpected type in LowerBUILD_VECTORvXi1!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6886, __PRETTY_FUNCTION__))
;
6887
6888 SDLoc dl(Op);
6889 if (ISD::isBuildVectorAllZeros(Op.getNode()))
6890 return DAG.getTargetConstant(0, dl, VT);
6891
6892 if (ISD::isBuildVectorAllOnes(Op.getNode()))
6893 return DAG.getTargetConstant(1, dl, VT);
6894
6895 if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
6896 SDValue Imm = ConvertI1VectorToInteger(Op, DAG);
6897 if (Imm.getValueSizeInBits() == VT.getSizeInBits())
6898 return DAG.getBitcast(VT, Imm);
6899 SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
6900 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
6901 DAG.getIntPtrConstant(0, dl));
6902 }
6903
6904 // Vector has one or more non-const elements
6905 uint64_t Immediate = 0;
6906 SmallVector<unsigned, 16> NonConstIdx;
6907 bool IsSplat = true;
6908 bool HasConstElts = false;
6909 int SplatIdx = -1;
6910 for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) {
6911 SDValue In = Op.getOperand(idx);
6912 if (In.isUndef())
6913 continue;
6914 if (!isa<ConstantSDNode>(In))
6915 NonConstIdx.push_back(idx);
6916 else {
6917 Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
6918 HasConstElts = true;
6919 }
6920 if (SplatIdx < 0)
6921 SplatIdx = idx;
6922 else if (In != Op.getOperand(SplatIdx))
6923 IsSplat = false;
6924 }
6925
6926 // for splat use " (select i1 splat_elt, all-ones, all-zeroes)"
6927 if (IsSplat)
6928 return DAG.getSelect(dl, VT, Op.getOperand(SplatIdx),
6929 DAG.getConstant(1, dl, VT),
6930 DAG.getConstant(0, dl, VT));
6931
6932 // insert elements one by one
6933 SDValue DstVec;
6934 SDValue Imm;
6935 if (Immediate) {
6936 MVT ImmVT = MVT::getIntegerVT(std::max((int)VT.getSizeInBits(), 8));
6937 Imm = DAG.getConstant(Immediate, dl, ImmVT);
6938 }
6939 else if (HasConstElts)
6940 Imm = DAG.getConstant(0, dl, VT);
6941 else
6942 Imm = DAG.getUNDEF(VT);
6943 if (Imm.getValueSizeInBits() == VT.getSizeInBits())
6944 DstVec = DAG.getBitcast(VT, Imm);
6945 else {
6946 SDValue ExtVec = DAG.getBitcast(MVT::v8i1, Imm);
6947 DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, ExtVec,
6948 DAG.getIntPtrConstant(0, dl));
6949 }
6950
6951 for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) {
6952 unsigned InsertIdx = NonConstIdx[i];
6953 DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
6954 Op.getOperand(InsertIdx),
6955 DAG.getIntPtrConstant(InsertIdx, dl));
6956 }
6957 return DstVec;
6958}
6959
6960/// \brief Return true if \p N implements a horizontal binop and return the
6961/// operands for the horizontal binop into V0 and V1.
6962///
6963/// This is a helper function of LowerToHorizontalOp().
6964/// This function checks that the build_vector \p N in input implements a
6965/// horizontal operation. Parameter \p Opcode defines the kind of horizontal
6966/// operation to match.
6967/// For example, if \p Opcode is equal to ISD::ADD, then this function
6968/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode
6969/// is equal to ISD::SUB, then this function checks if this is a horizontal
6970/// arithmetic sub.
6971///
6972/// This function only analyzes elements of \p N whose indices are
6973/// in range [BaseIdx, LastIdx).
6974static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode,
6975 SelectionDAG &DAG,
6976 unsigned BaseIdx, unsigned LastIdx,
6977 SDValue &V0, SDValue &V1) {
6978 EVT VT = N->getValueType(0);
6979
6980 assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!")((BaseIdx * 2 <= LastIdx && "Invalid Indices in input!"
) ? static_cast<void> (0) : __assert_fail ("BaseIdx * 2 <= LastIdx && \"Invalid Indices in input!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6980, __PRETTY_FUNCTION__))
;
6981 assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx &&((VT.isVector() && VT.getVectorNumElements() >= LastIdx
&& "Invalid Vector in input!") ? static_cast<void
> (0) : __assert_fail ("VT.isVector() && VT.getVectorNumElements() >= LastIdx && \"Invalid Vector in input!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6982, __PRETTY_FUNCTION__))
6982 "Invalid Vector in input!")((VT.isVector() && VT.getVectorNumElements() >= LastIdx
&& "Invalid Vector in input!") ? static_cast<void
> (0) : __assert_fail ("VT.isVector() && VT.getVectorNumElements() >= LastIdx && \"Invalid Vector in input!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 6982, __PRETTY_FUNCTION__))
;
6983
6984 bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD);
6985 bool CanFold = true;
6986 unsigned ExpectedVExtractIdx = BaseIdx;
6987 unsigned NumElts = LastIdx - BaseIdx;
6988 V0 = DAG.getUNDEF(VT);
6989 V1 = DAG.getUNDEF(VT);
6990
6991 // Check if N implements a horizontal binop.
6992 for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) {
6993 SDValue Op = N->getOperand(i + BaseIdx);
6994
6995 // Skip UNDEFs.
6996 if (Op->isUndef()) {
6997 // Update the expected vector extract index.
6998 if (i * 2 == NumElts)
6999 ExpectedVExtractIdx = BaseIdx;
7000 ExpectedVExtractIdx += 2;
7001 continue;
7002 }
7003
7004 CanFold = Op->getOpcode() == Opcode && Op->hasOneUse();
7005
7006 if (!CanFold)
7007 break;
7008
7009 SDValue Op0 = Op.getOperand(0);
7010 SDValue Op1 = Op.getOperand(1);
7011
7012 // Try to match the following pattern:
7013 // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1))
7014 CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7015 Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
7016 Op0.getOperand(0) == Op1.getOperand(0) &&
7017 isa<ConstantSDNode>(Op0.getOperand(1)) &&
7018 isa<ConstantSDNode>(Op1.getOperand(1)));
7019 if (!CanFold)
7020 break;
7021
7022 unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
7023 unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue();
7024
7025 if (i * 2 < NumElts) {
7026 if (V0.isUndef()) {
7027 V0 = Op0.getOperand(0);
7028 if (V0.getValueType() != VT)
7029 return false;
7030 }
7031 } else {
7032 if (V1.isUndef()) {
7033 V1 = Op0.getOperand(0);
7034 if (V1.getValueType() != VT)
7035 return false;
7036 }
7037 if (i * 2 == NumElts)
7038 ExpectedVExtractIdx = BaseIdx;
7039 }
7040
7041 SDValue Expected = (i * 2 < NumElts) ? V0 : V1;
7042 if (I0 == ExpectedVExtractIdx)
7043 CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected;
7044 else if (IsCommutable && I1 == ExpectedVExtractIdx) {
7045 // Try to match the following dag sequence:
7046 // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I))
7047 CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected;
7048 } else
7049 CanFold = false;
7050
7051 ExpectedVExtractIdx += 2;
7052 }
7053
7054 return CanFold;
7055}
7056
7057/// \brief Emit a sequence of two 128-bit horizontal add/sub followed by
7058/// a concat_vector.
7059///
7060/// This is a helper function of LowerToHorizontalOp().
7061/// This function expects two 256-bit vectors called V0 and V1.
7062/// At first, each vector is split into two separate 128-bit vectors.
7063/// Then, the resulting 128-bit vectors are used to implement two
7064/// horizontal binary operations.
7065///
7066/// The kind of horizontal binary operation is defined by \p X86Opcode.
7067///
7068/// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to
7069/// the two new horizontal binop.
7070/// When Mode is set, the first horizontal binop dag node would take as input
7071/// the lower 128-bit of V0 and the upper 128-bit of V0. The second
7072/// horizontal binop dag node would take as input the lower 128-bit of V1
7073/// and the upper 128-bit of V1.
7074/// Example:
7075/// HADD V0_LO, V0_HI
7076/// HADD V1_LO, V1_HI
7077///
7078/// Otherwise, the first horizontal binop dag node takes as input the lower
7079/// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop
7080/// dag node takes the upper 128-bit of V0 and the upper 128-bit of V1.
7081/// Example:
7082/// HADD V0_LO, V1_LO
7083/// HADD V0_HI, V1_HI
7084///
7085/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower
7086/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to
7087/// the upper 128-bits of the result.
7088static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1,
7089 const SDLoc &DL, SelectionDAG &DAG,
7090 unsigned X86Opcode, bool Mode,
7091 bool isUndefLO, bool isUndefHI) {
7092 MVT VT = V0.getSimpleValueType();
7093 assert(VT.is256BitVector() && VT == V1.getSimpleValueType() &&((VT.is256BitVector() && VT == V1.getSimpleValueType(
) && "Invalid nodes in input!") ? static_cast<void
> (0) : __assert_fail ("VT.is256BitVector() && VT == V1.getSimpleValueType() && \"Invalid nodes in input!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 7094, __PRETTY_FUNCTION__))
7094 "Invalid nodes in input!")((VT.is256BitVector() && VT == V1.getSimpleValueType(
) && "Invalid nodes in input!") ? static_cast<void
> (0) : __assert_fail ("VT.is256BitVector() && VT == V1.getSimpleValueType() && \"Invalid nodes in input!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 7094, __PRETTY_FUNCTION__))
;
7095
7096 unsigned NumElts = VT.getVectorNumElements();
7097 SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL);
7098 SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL);
7099 SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL);
7100 SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL);
7101 MVT NewVT = V0_LO.getSimpleValueType();
7102
7103 SDValue LO = DAG.getUNDEF(NewVT);
7104 SDValue HI = DAG.getUNDEF(NewVT);
7105
7106 if (Mode) {
7107 // Don't emit a horizontal binop if the result is expected to be UNDEF.
7108 if (!isUndefLO && !V0->isUndef())
7109 LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI);
7110 if (!isUndefHI && !V1->isUndef())
7111 HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI);
7112 } else {
7113 // Don't emit a horizontal binop if the result is expected to be UNDEF.
7114 if (!isUndefLO && (!V0_LO->isUndef() || !V1_LO->isUndef()))
7115 LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO);
7116
7117 if (!isUndefHI && (!V0_HI->isUndef() || !V1_HI->isUndef()))
7118 HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI);
7119 }
7120
7121 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI);
7122}
7123
7124/// Returns true iff \p BV builds a vector with the result equivalent to
7125/// the result of ADDSUB operation.
7126/// If true is returned then the operands of ADDSUB = Opnd0 +- Opnd1 operation
7127/// are written to the parameters \p Opnd0 and \p Opnd1.
7128static bool isAddSub(const BuildVectorSDNode *BV,
7129 const X86Subtarget &Subtarget, SelectionDAG &DAG,
7130 SDValue &Opnd0, SDValue &Opnd1) {
7131
7132 MVT VT = BV->getSimpleValueType(0);
7133 if ((!Subtarget.hasSSE3() || (VT != MVT::v4f32 && VT != MVT::v2f64)) &&
7134 (!Subtarget.hasAVX() || (VT != MVT::v8f32 && VT != MVT::v4f64)) &&
7135 (!Subtarget.hasAVX512() || (VT != MVT::v16f32 && VT != MVT::v8f64)))
7136 return false;
7137
7138 unsigned NumElts = VT.getVectorNumElements();
7139 SDValue InVec0 = DAG.getUNDEF(VT);
7140 SDValue InVec1 = DAG.getUNDEF(VT);
7141
7142 // Odd-numbered elements in the input build vector are obtained from
7143 // adding two integer/float elements.
7144 // Even-numbered elements in the input build vector are obtained from
7145 // subtracting two integer/float elements.
7146 unsigned ExpectedOpcode = ISD::FSUB;
7147 unsigned NextExpectedOpcode = ISD::FADD;
7148 bool AddFound = false;
7149 bool SubFound = false;
7150
7151 for (unsigned i = 0, e = NumElts; i != e; ++i) {
7152 SDValue Op = BV->getOperand(i);
7153
7154 // Skip 'undef' values.
7155 unsigned Opcode = Op.getOpcode();
7156 if (Opcode == ISD::UNDEF) {
7157 std::swap(ExpectedOpcode, NextExpectedOpcode);
7158 continue;
7159 }
7160
7161 // Early exit if we found an unexpected opcode.
7162 if (Opcode != ExpectedOpcode)
7163 return false;
7164
7165 SDValue Op0 = Op.getOperand(0);
7166 SDValue Op1 = Op.getOperand(1);
7167
7168 // Try to match the following pattern:
7169 // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i))
7170 // Early exit if we cannot match that sequence.
7171 if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7172 Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
7173 !isa<ConstantSDNode>(Op0.getOperand(1)) ||
7174 !isa<ConstantSDNode>(Op1.getOperand(1)) ||
7175 Op0.getOperand(1) != Op1.getOperand(1))
7176 return false;
7177
7178 unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
7179 if (I0 != i)
7180 return false;
7181
7182 // We found a valid add/sub node. Update the information accordingly.
7183 if (i & 1)
7184 AddFound = true;
7185 else
7186 SubFound = true;
7187
7188 // Update InVec0 and InVec1.
7189 if (InVec0.isUndef()) {
7190 InVec0 = Op0.getOperand(0);
7191 if (InVec0.getSimpleValueType() != VT)
7192 return false;
7193 }
7194 if (InVec1.isUndef()) {
7195 InVec1 = Op1.getOperand(0);
7196 if (InVec1.getSimpleValueType() != VT)
7197 return false;
7198 }
7199
7200 // Make sure that operands in input to each add/sub node always
7201 // come from a same pair of vectors.
7202 if (InVec0 != Op0.getOperand(0)) {
7203 if (ExpectedOpcode == ISD::FSUB)
7204 return false;
7205
7206 // FADD is commutable. Try to commute the operands
7207 // and then test again.
7208 std::swap(Op0, Op1);
7209 if (InVec0 != Op0.getOperand(0))
7210 return false;
7211 }
7212
7213 if (InVec1 != Op1.getOperand(0))
7214 return false;
7215
7216 // Update the pair of expected opcodes.
7217 std::swap(ExpectedOpcode, NextExpectedOpcode);
7218 }
7219
7220 // Don't try to fold this build_vector into an ADDSUB if the inputs are undef.
7221 if (!AddFound || !SubFound || InVec0.isUndef() || InVec1.isUndef())
7222 return false;
7223
7224 Opnd0 = InVec0;
7225 Opnd1 = InVec1;
7226 return true;
7227}
7228
7229/// Returns true if is possible to fold MUL and an idiom that has already been
7230/// recognized as ADDSUB(\p Opnd0, \p Opnd1) into FMADDSUB(x, y, \p Opnd1).
7231/// If (and only if) true is returned, the operands of FMADDSUB are written to
7232/// parameters \p Opnd0, \p Opnd1, \p Opnd2.
7233///
7234/// Prior to calling this function it should be known that there is some
7235/// SDNode that potentially can be replaced with an X86ISD::ADDSUB operation
7236/// using \p Opnd0 and \p Opnd1 as operands. Also, this method is called
7237/// before replacement of such SDNode with ADDSUB operation. Thus the number
7238/// of \p Opnd0 uses is expected to be equal to 2.
7239/// For example, this function may be called for the following IR:
7240/// %AB = fmul fast <2 x double> %A, %B
7241/// %Sub = fsub fast <2 x double> %AB, %C
7242/// %Add = fadd fast <2 x double> %AB, %C
7243/// %Addsub = shufflevector <2 x double> %Sub, <2 x double> %Add,
7244/// <2 x i32> <i32 0, i32 3>
7245/// There is a def for %Addsub here, which potentially can be replaced by
7246/// X86ISD::ADDSUB operation:
7247/// %Addsub = X86ISD::ADDSUB %AB, %C
7248/// and such ADDSUB can further be replaced with FMADDSUB:
7249/// %Addsub = FMADDSUB %A, %B, %C.
7250///
7251/// The main reason why this method is called before the replacement of the
7252/// recognized ADDSUB idiom with ADDSUB operation is that such replacement
7253/// is illegal sometimes. E.g. 512-bit ADDSUB is not available, while 512-bit
7254/// FMADDSUB is.
7255static bool isFMAddSub(const X86Subtarget &Subtarget, SelectionDAG &DAG,
7256 SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2) {
7257 if (Opnd0.getOpcode() != ISD::FMUL || Opnd0->use_size() != 2 ||
7258 !Subtarget.hasAnyFMA())
7259 return false;
7260
7261 // FIXME: These checks must match the similar ones in
7262 // DAGCombiner::visitFADDForFMACombine. It would be good to have one
7263 // function that would answer if it is Ok to fuse MUL + ADD to FMADD
7264 // or MUL + ADDSUB to FMADDSUB.
7265 const TargetOptions &Options = DAG.getTarget().Options;
7266 bool AllowFusion =
7267 (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
7268 if (!AllowFusion)
7269 return false;
7270
7271 Opnd2 = Opnd1;
7272 Opnd1 = Opnd0.getOperand(1);
7273 Opnd0 = Opnd0.getOperand(0);
7274
7275 return true;
7276}
7277
7278/// Try to fold a build_vector that performs an 'addsub' or 'fmaddsub' operation
7279/// accordingly to X86ISD::ADDSUB or X86ISD::FMADDSUB node.
7280static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV,
7281 const X86Subtarget &Subtarget,
7282 SelectionDAG &DAG) {
7283 SDValue Opnd0, Opnd1;
7284 if (!isAddSub(BV, Subtarget, DAG, Opnd0, Opnd1))
7285 return SDValue();
7286
7287 MVT VT = BV->getSimpleValueType(0);
7288 SDLoc DL(BV);
7289
7290 // Try to generate X86ISD::FMADDSUB node here.
7291 SDValue Opnd2;
7292 if (isFMAddSub(Subtarget, DAG, Opnd0, Opnd1, Opnd2))
7293 return DAG.getNode(X86ISD::FMADDSUB, DL, VT, Opnd0, Opnd1, Opnd2);
7294
7295 // Do not generate X86ISD::ADDSUB node for 512-bit types even though
7296 // the ADDSUB idiom has been successfully recognized. There are no known
7297 // X86 targets with 512-bit ADDSUB instructions!
7298 // 512-bit ADDSUB idiom recognition was needed only as part of FMADDSUB idiom
7299 // recognition.
7300 if (VT.is512BitVector())
7301 return SDValue();
7302
7303 return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1);
7304}
7305
7306/// Lower BUILD_VECTOR to a horizontal add/sub operation if possible.
7307static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
7308 const X86Subtarget &Subtarget,
7309 SelectionDAG &DAG) {
7310 MVT VT = BV->getSimpleValueType(0);
7311 unsigned NumElts = VT.getVectorNumElements();
7312 unsigned NumUndefsLO = 0;
7313 unsigned NumUndefsHI = 0;
7314 unsigned Half = NumElts/2;
7315
7316 // Count the number of UNDEF operands in the build_vector in input.
7317 for (unsigned i = 0, e = Half; i != e; ++i)
7318 if (BV->getOperand(i)->isUndef())
7319 NumUndefsLO++;
7320
7321 for (unsigned i = Half, e = NumElts; i != e; ++i)
7322 if (BV->getOperand(i)->isUndef())
7323 NumUndefsHI++;
7324
7325 // Early exit if this is either a build_vector of all UNDEFs or all the
7326 // operands but one are UNDEF.
7327 if (NumUndefsLO + NumUndefsHI + 1 >= NumElts)
7328 return SDValue();
7329
7330 SDLoc DL(BV);
7331 SDValue InVec0, InVec1;
7332 if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) {
7333 // Try to match an SSE3 float HADD/HSUB.
7334 if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
7335 return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
7336
7337 if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
7338 return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
7339 } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget.hasSSSE3()) {
7340 // Try to match an SSSE3 integer HADD/HSUB.
7341 if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
7342 return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1);
7343
7344 if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
7345 return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1);
7346 }
7347
7348 if (!Subtarget.hasAVX())
7349 return SDValue();
7350
7351 if ((VT == MVT::v8f32 || VT == MVT::v4f64)) {
7352 // Try to match an AVX horizontal add/sub of packed single/double
7353 // precision floating point values from 256-bit vectors.
7354 SDValue InVec2, InVec3;
7355 if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) &&
7356 isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) &&
7357 ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
7358 ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
7359 return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1);
7360
7361 if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) &&
7362 isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) &&
7363 ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
7364 ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
7365 return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1);
7366 } else if (VT == MVT::v8i32 || VT == MVT::v16i16) {
7367 // Try to match an AVX2 horizontal add/sub of signed integers.
7368 SDValue InVec2, InVec3;
7369 unsigned X86Opcode;
7370 bool CanFold = true;
7371
7372 if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) &&
7373 isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) &&
7374 ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
7375 ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
7376 X86Opcode = X86ISD::HADD;
7377 else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) &&
7378 isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) &&
7379 ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) &&
7380 ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3))
7381 X86Opcode = X86ISD::HSUB;
7382 else
7383 CanFold = false;
7384
7385 if (CanFold) {
7386 // Fold this build_vector into a single horizontal add/sub.
7387 // Do this only if the target has AVX2.
7388 if (Subtarget.hasAVX2())
7389 return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1);
7390
7391 // Do not try to expand this build_vector into a pair of horizontal
7392 // add/sub if we can emit a pair of scalar add/sub.
7393 if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
7394 return SDValue();
7395
7396 // Convert this build_vector into a pair of horizontal binop followed by
7397 // a concat vector.
7398 bool isUndefLO = NumUndefsLO == Half;
7399 bool isUndefHI = NumUndefsHI == Half;
7400 return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false,
7401 isUndefLO, isUndefHI);
7402 }
7403 }
7404
7405 if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 ||
7406 VT == MVT::v16i16) && Subtarget.hasAVX()) {
7407 unsigned X86Opcode;
7408 if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1))
7409 X86Opcode = X86ISD::HADD;
7410 else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1))
7411 X86Opcode = X86ISD::HSUB;
7412 else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1))
7413 X86Opcode = X86ISD::FHADD;
7414 else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1))
7415 X86Opcode = X86ISD::FHSUB;
7416 else
7417 return SDValue();
7418
7419 // Don't try to expand this build_vector into a pair of horizontal add/sub
7420 // if we can simply emit a pair of scalar add/sub.
7421 if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half)
7422 return SDValue();
7423
7424 // Convert this build_vector into two horizontal add/sub followed by
7425 // a concat vector.
7426 bool isUndefLO = NumUndefsLO == Half;
7427 bool isUndefHI = NumUndefsHI == Half;
7428 return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true,
7429 isUndefLO, isUndefHI);
7430 }
7431
7432 return SDValue();
7433}
7434
7435/// If a BUILD_VECTOR's source elements all apply the same bit operation and
7436/// one of their operands is constant, lower to a pair of BUILD_VECTOR and
7437/// just apply the bit to the vectors.
7438/// NOTE: Its not in our interest to start make a general purpose vectorizer
7439/// from this, but enough scalar bit operations are created from the later
7440/// legalization + scalarization stages to need basic support.
7441static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
7442 SelectionDAG &DAG) {
7443 SDLoc DL(Op);
7444 MVT VT = Op->getSimpleValueType(0);
7445 unsigned NumElems = VT.getVectorNumElements();
7446 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7447
7448 // Check that all elements have the same opcode.
7449 // TODO: Should we allow UNDEFS and if so how many?
7450 unsigned Opcode = Op->getOperand(0).getOpcode();
7451 for (unsigned i = 1; i < NumElems; ++i)
7452 if (Opcode != Op->getOperand(i).getOpcode())
7453 return SDValue();
7454
7455 // TODO: We may be able to add support for other Ops (ADD/SUB + shifts).
7456 switch (Opcode) {
7457 default:
7458 return SDValue();
7459 case ISD::AND:
7460 case ISD::XOR:
7461 case ISD::OR:
7462 if (!TLI.isOperationLegalOrPromote(Opcode, VT))
7463 return SDValue();
7464 break;
7465 }
7466
7467 SmallVector<SDValue, 4> LHSElts, RHSElts;
7468 for (SDValue Elt : Op->ops()) {
7469 SDValue LHS = Elt.getOperand(0);
7470 SDValue RHS = Elt.getOperand(1);
7471
7472 // We expect the canonicalized RHS operand to be the constant.
7473 if (!isa<ConstantSDNode>(RHS))
7474 return SDValue();
7475 LHSElts.push_back(LHS);
7476 RHSElts.push_back(RHS);
7477 }
7478
7479 SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
7480 SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
7481 return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7482}
7483
7484/// Create a vector constant without a load. SSE/AVX provide the bare minimum
7485/// functionality to do this, so it's all zeros, all ones, or some derivation
7486/// that is cheap to calculate.
7487static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG,
7488 const X86Subtarget &Subtarget) {
7489 SDLoc DL(Op);
7490 MVT VT = Op.getSimpleValueType();
7491
7492 // Vectors containing all zeros can be matched by pxor and xorps.
7493 if (ISD::isBuildVectorAllZeros(Op.getNode())) {
7494 // Canonicalize this to <4 x i32> to 1) ensure the zero vectors are CSE'd
7495 // and 2) ensure that i64 scalars are eliminated on x86-32 hosts.
7496 if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32)
7497 return Op;
7498
7499 return getZeroVector(VT, Subtarget, DAG, DL);
7500 }
7501
7502 // Vectors containing all ones can be matched by pcmpeqd on 128-bit width
7503 // vectors or broken into v4i32 operations on 256-bit vectors. AVX2 can use
7504 // vpcmpeqd on 256-bit vectors.
7505 if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) {
7506 if (VT == MVT::v4i32 || VT == MVT::v16i32 ||
7507 (VT == MVT::v8i32 && Subtarget.hasInt256()))
7508 return Op;
7509
7510 return getOnesVector(VT, DAG, DL);
7511 }
7512
7513 return SDValue();
7514}
7515
7516SDValue
7517X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
7518 SDLoc dl(Op);
7519
7520 MVT VT = Op.getSimpleValueType();
7521 MVT ExtVT = VT.getVectorElementType();
7522 unsigned NumElems = Op.getNumOperands();
7523
7524 // Generate vectors for predicate vectors.
7525 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512())
7526 return LowerBUILD_VECTORvXi1(Op, DAG);
7527
7528 if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget))
7529 return VectorConstant;
7530
7531 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode());
7532 if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG))
7533 return AddSub;
7534 if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
7535 return HorizontalOp;
7536 if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG))
7537 return Broadcast;
7538 if (SDValue BitOp = lowerBuildVectorToBitOp(BV, DAG))
7539 return BitOp;
7540
7541 unsigned EVTBits = ExtVT.getSizeInBits();
7542
7543 unsigned NumZero = 0;
7544 unsigned NumNonZero = 0;
7545 uint64_t NonZeros = 0;
7546 bool IsAllConstants = true;
7547 SmallSet<SDValue, 8> Values;
7548 for (unsigned i = 0; i < NumElems; ++i) {
7549 SDValue Elt = Op.getOperand(i);
7550 if (Elt.isUndef())
7551 continue;
7552 Values.insert(Elt);
7553 if (Elt.getOpcode() != ISD::Constant &&
7554 Elt.getOpcode() != ISD::ConstantFP)
7555 IsAllConstants = false;
7556 if (X86::isZeroNode(Elt))
7557 NumZero++;
7558 else {
7559 assert(i < sizeof(NonZeros) * 8)((i < sizeof(NonZeros) * 8) ? static_cast<void> (0) :
__assert_fail ("i < sizeof(NonZeros) * 8", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 7559, __PRETTY_FUNCTION__))
; // Make sure the shift is within range.
7560 NonZeros |= ((uint64_t)1 << i);
7561 NumNonZero++;
7562 }
7563 }
7564
7565 // All undef vector. Return an UNDEF. All zero vectors were handled above.
7566 if (NumNonZero == 0)
7567 return DAG.getUNDEF(VT);
7568
7569 // Special case for single non-zero, non-undef, element.
7570 if (NumNonZero == 1) {
7571 unsigned Idx = countTrailingZeros(NonZeros);
7572 SDValue Item = Op.getOperand(Idx);
7573
7574 // If this is an insertion of an i64 value on x86-32, and if the top bits of
7575 // the value are obviously zero, truncate the value to i32 and do the
7576 // insertion that way. Only do this if the value is non-constant or if the
7577 // value is a constant being inserted into element 0. It is cheaper to do
7578 // a constant pool load than it is to do a movd + shuffle.
7579 if (ExtVT == MVT::i64 && !Subtarget.is64Bit() &&
7580 (!IsAllConstants || Idx == 0)) {
7581 if (DAG.MaskedValueIsZero(Item, APInt::getHighBitsSet(64, 32))) {
7582 // Handle SSE only.
7583 assert(VT == MVT::v2i64 && "Expected an SSE value type!")((VT == MVT::v2i64 && "Expected an SSE value type!") ?
static_cast<void> (0) : __assert_fail ("VT == MVT::v2i64 && \"Expected an SSE value type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 7583, __PRETTY_FUNCTION__))
;
7584 MVT VecVT = MVT::v4i32;
7585
7586 // Truncate the value (which may itself be a constant) to i32, and
7587 // convert it to a vector with movd (S2V+shuffle to zero extend).
7588 Item = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Item);
7589 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Item);
7590 return DAG.getBitcast(VT, getShuffleVectorZeroOrUndef(
7591 Item, Idx * 2, true, Subtarget, DAG));
7592 }
7593 }
7594
7595 // If we have a constant or non-constant insertion into the low element of
7596 // a vector, we can do this with SCALAR_TO_VECTOR + shuffle of zero into
7597 // the rest of the elements. This will be matched as movd/movq/movss/movsd
7598 // depending on what the source datatype is.
7599 if (Idx == 0) {
7600 if (NumZero == 0)
7601 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
7602
7603 if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
7604 (ExtVT == MVT::i64 && Subtarget.is64Bit())) {
7605 assert((VT.is128BitVector() || VT.is256BitVector() ||(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected an SSE value type!") ? static_cast<
void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected an SSE value type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 7607, __PRETTY_FUNCTION__))
7606 VT.is512BitVector()) &&(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected an SSE value type!") ? static_cast<
void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected an SSE value type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 7607, __PRETTY_FUNCTION__))
7607 "Expected an SSE value type!")(((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector
()) && "Expected an SSE value type!") ? static_cast<
void> (0) : __assert_fail ("(VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && \"Expected an SSE value type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 7607, __PRETTY_FUNCTION__))
;
7608 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
7609 // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
7610 return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
7611 }
7612
7613 // We can't directly insert an i8 or i16 into a vector, so zero extend
7614 // it to i32 first.
7615 if (ExtVT == MVT::i16 || ExtVT == MVT::i8) {
7616 Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item);
7617 if (VT.getSizeInBits() >= 256) {
7618 MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
7619 if (Subtarget.hasAVX()) {
7620 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item);
7621 Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
7622 } else {
7623 // Without AVX, we need to extend to a 128-bit vector and then
7624 // insert into the 256-bit vector.
7625 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
7626 SDValue ZeroVec = getZeroVector(ShufVT, Subtarget, DAG, dl);
7627 Item = insert128BitVector(ZeroVec, Item, 0, DAG, dl);
7628 }
7629 } else {
7630 assert(VT.is128BitVector() && "Expected an SSE value type!")((VT.is128BitVector() && "Expected an SSE value type!"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && \"Expected an SSE value type!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn303373/lib/Target/X86/X86ISelLowering.cpp"
, 7630, __PRETTY_FUNCTION__))
;
7631 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Item);
7632 Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG);
7633 }
7634 return DAG.getBitcast(VT, Item);
7635 }
7636 }
7637
7638 // Is it a vector logical left shift?
7639 if (NumElems == 2 && Idx == 1 &&
7640 X86::isZeroNode(Op.getOperand(0)) &&
7641 !X86::isZeroNode(Op.getOperand(1))) {
7642 unsigned NumBits = VT.getSizeInBits();
7643 return getVShift(true, VT,
7644 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl,
7645 VT, Op.getOperand(1)),
7646 NumBits/2, DAG, *this, dl);
7647 }
7648
7649 if (IsAllConstants) // Otherwise, it's better to do a constpool load.
7650 return SDValue();
7651
7652 // Otherwise, if this is a vector with i32 or f32 elements, and the element
7653 // is a non-constant being inserted into an element other than the low one,
7654 // we can't use a constant pool load. Instead, use SCALAR_TO_VECTOR (aka
7655 // movd/movss) to move this into the low element, then shuffle it into
7656 // place.
7657 if (EVTBits == 32) {
7658 Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
7659 return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG);
7660 }
7661 }
7662
7663 // Splat is obviously ok. Let legalizer expand it to a shuffle.
7664 if (Values.size() == 1) {
7665 if (EVTBits == 32) {
7666 // Instead of a shuffle like this:
7667 // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
7668 // Check if it's possible to issue this instead.
7669 // shuffle (vload ptr)), undef, <1, 1, 1, 1>
7670 unsigned Idx = countTrailingZeros(NonZeros);
7671 SDValue Item = Op.getOperand(Idx);
7672 if (Op.getNode()->isOnlyUserOf(Item.getNode()))
7673 return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
7674 }
7675 return SDValue();
7676 }
7677
7678 // A vector full of immediates; various special cases are already
7679 // handled, so this is best done with a single constant-pool load.
7680 if (IsAllConstants)
7681 return SDValue();
7682
7683 // See if we can use a vector load to get all of the elements.
7684 if (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) {
7685 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
7686 if (SDValue LD = EltsFromConsecutiveLoads(VT, Ops, dl, DAG, false))
7687 return LD;
7688 }
7689
7690 // For AVX-length vectors, build the individual 128-bit pieces and use
7691 // shuffles to put them in place.
7692 if (VT.is256BitVector() || VT.is512BitVector()) {
7693 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems);
7694
7695 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElems/2);
7696
7697 // Build both the lower and upper subvector.
7698 SDValue Lower =
7699 DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElems / 2));
7700 SDValue Upper = DAG.getBuildVector(
7701 HVT, dl, makeArrayRef(&Ops[NumElems / 2], NumElems / 2));
7702
7703 // Recreate the wider vector with the lower and upper part.
7704 if (VT.is256BitVector())
7705 return concat128BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
7706 return concat256BitVectors(Lower, Upper, VT, NumElems, DAG, dl);
7707 }
7708
7709 // Let legalizer expand 2-wide build_vectors.
7710 if (EVTBits == 64) {
7711 if (NumNonZero == 1) {
7712 // One half is zero or undef.
7713 unsigned Idx = countTrailingZeros(NonZeros);
7714 SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT,
7715 Op.getOperand(Idx));
7716 return</