Bug Summary

File:lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 8182, column 48
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the AArch64TargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64ISelLowering.h"
15#include "AArch64CallingConvention.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/CallingConvLower.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstr.h"
38#include "llvm/CodeGen/MachineInstrBuilder.h"
39#include "llvm/CodeGen/MachineMemOperand.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/MachineValueType.h"
42#include "llvm/CodeGen/RuntimeLibcalls.h"
43#include "llvm/CodeGen/SelectionDAG.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/TargetCallingConv.h"
46#include "llvm/CodeGen/TargetInstrInfo.h"
47#include "llvm/CodeGen/ValueTypes.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugLoc.h"
52#include "llvm/IR/DerivedTypes.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/GetElementPtrTypeIterator.h"
55#include "llvm/IR/GlobalValue.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/Instruction.h"
58#include "llvm/IR/Instructions.h"
59#include "llvm/IR/Intrinsics.h"
60#include "llvm/IR/Module.h"
61#include "llvm/IR/OperandTraits.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/Value.h"
65#include "llvm/MC/MCRegisterInfo.h"
66#include "llvm/Support/Casting.h"
67#include "llvm/Support/CodeGen.h"
68#include "llvm/Support/CommandLine.h"
69#include "llvm/Support/Compiler.h"
70#include "llvm/Support/Debug.h"
71#include "llvm/Support/ErrorHandling.h"
72#include "llvm/Support/KnownBits.h"
73#include "llvm/Support/MathExtras.h"
74#include "llvm/Support/raw_ostream.h"
75#include "llvm/Target/TargetMachine.h"
76#include "llvm/Target/TargetOptions.h"
77#include <algorithm>
78#include <bitset>
79#include <cassert>
80#include <cctype>
81#include <cstdint>
82#include <cstdlib>
83#include <iterator>
84#include <limits>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90
91#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
92
93STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
94STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts", {0}, false}
;
95STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized", {0}, false}
;
96
97static cl::opt<bool>
98EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
99 cl::desc("Allow AArch64 SLI/SRI formation"),
100 cl::init(false));
101
102// FIXME: The necessary dtprel relocations don't seem to be supported
103// well in the GNU bfd and gold linkers at the moment. Therefore, by
104// default, for now, fall back to GeneralDynamic code generation.
105cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
106 "aarch64-elf-ldtls-generation", cl::Hidden,
107 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
108 cl::init(false));
109
110static cl::opt<bool>
111EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
112 cl::desc("Enable AArch64 logical imm instruction "
113 "optimization"),
114 cl::init(true));
115
116/// Value type used for condition codes.
117static const MVT MVT_CC = MVT::i32;
118
119AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
120 const AArch64Subtarget &STI)
121 : TargetLowering(TM), Subtarget(&STI) {
122 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
123 // we have to make something up. Arbitrarily, choose ZeroOrOne.
124 setBooleanContents(ZeroOrOneBooleanContent);
125 // When comparing vectors the result sets the different elements in the
126 // vector to all-one or all-zero.
127 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
128
129 // Set up the register classes.
130 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
131 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
132
133 if (Subtarget->hasFPARMv8()) {
134 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
135 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
136 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
137 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
138 }
139
140 if (Subtarget->hasNEON()) {
141 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
142 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
143 // Someone set us up the NEON.
144 addDRTypeForNEON(MVT::v2f32);
145 addDRTypeForNEON(MVT::v8i8);
146 addDRTypeForNEON(MVT::v4i16);
147 addDRTypeForNEON(MVT::v2i32);
148 addDRTypeForNEON(MVT::v1i64);
149 addDRTypeForNEON(MVT::v1f64);
150 addDRTypeForNEON(MVT::v4f16);
151
152 addQRTypeForNEON(MVT::v4f32);
153 addQRTypeForNEON(MVT::v2f64);
154 addQRTypeForNEON(MVT::v16i8);
155 addQRTypeForNEON(MVT::v8i16);
156 addQRTypeForNEON(MVT::v4i32);
157 addQRTypeForNEON(MVT::v2i64);
158 addQRTypeForNEON(MVT::v8f16);
159 }
160
161 // Compute derived properties from the register classes
162 computeRegisterProperties(Subtarget->getRegisterInfo());
163
164 // Provide all sorts of operation actions
165 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
166 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
167 setOperationAction(ISD::SETCC, MVT::i32, Custom);
168 setOperationAction(ISD::SETCC, MVT::i64, Custom);
169 setOperationAction(ISD::SETCC, MVT::f16, Custom);
170 setOperationAction(ISD::SETCC, MVT::f32, Custom);
171 setOperationAction(ISD::SETCC, MVT::f64, Custom);
172 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
173 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
174 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
175 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
176 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
177 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
178 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
179 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
180 setOperationAction(ISD::SELECT, MVT::i32, Custom);
181 setOperationAction(ISD::SELECT, MVT::i64, Custom);
182 setOperationAction(ISD::SELECT, MVT::f16, Custom);
183 setOperationAction(ISD::SELECT, MVT::f32, Custom);
184 setOperationAction(ISD::SELECT, MVT::f64, Custom);
185 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
186 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
187 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
188 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
189 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
190 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
191 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
192
193 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
194 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
195 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
196
197 setOperationAction(ISD::FREM, MVT::f32, Expand);
198 setOperationAction(ISD::FREM, MVT::f64, Expand);
199 setOperationAction(ISD::FREM, MVT::f80, Expand);
200
201 // Custom lowering hooks are needed for XOR
202 // to fold it into CSINC/CSINV.
203 setOperationAction(ISD::XOR, MVT::i32, Custom);
204 setOperationAction(ISD::XOR, MVT::i64, Custom);
205
206 // Virtually no operation on f128 is legal, but LLVM can't expand them when
207 // there's a valid register class, so we need custom operations in most cases.
208 setOperationAction(ISD::FABS, MVT::f128, Expand);
209 setOperationAction(ISD::FADD, MVT::f128, Custom);
210 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
211 setOperationAction(ISD::FCOS, MVT::f128, Expand);
212 setOperationAction(ISD::FDIV, MVT::f128, Custom);
213 setOperationAction(ISD::FMA, MVT::f128, Expand);
214 setOperationAction(ISD::FMUL, MVT::f128, Custom);
215 setOperationAction(ISD::FNEG, MVT::f128, Expand);
216 setOperationAction(ISD::FPOW, MVT::f128, Expand);
217 setOperationAction(ISD::FREM, MVT::f128, Expand);
218 setOperationAction(ISD::FRINT, MVT::f128, Expand);
219 setOperationAction(ISD::FSIN, MVT::f128, Expand);
220 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
221 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
222 setOperationAction(ISD::FSUB, MVT::f128, Custom);
223 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
224 setOperationAction(ISD::SETCC, MVT::f128, Custom);
225 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
226 setOperationAction(ISD::SELECT, MVT::f128, Custom);
227 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
228 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
229
230 // Lowering for many of the conversions is actually specified by the non-f128
231 // type. The LowerXXX function will be trivial when f128 isn't involved.
232 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
233 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
234 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
235 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
236 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
237 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
238 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
239 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
240 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
241 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
242 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
243 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
244 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
245 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
246
247 // Variable arguments.
248 setOperationAction(ISD::VASTART, MVT::Other, Custom);
249 setOperationAction(ISD::VAARG, MVT::Other, Custom);
250 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
251 setOperationAction(ISD::VAEND, MVT::Other, Expand);
252
253 // Variable-sized objects.
254 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
255 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
256 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
257
258 // Constant pool entries
259 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
260
261 // BlockAddress
262 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
263
264 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
265 setOperationAction(ISD::ADDC, MVT::i32, Custom);
266 setOperationAction(ISD::ADDE, MVT::i32, Custom);
267 setOperationAction(ISD::SUBC, MVT::i32, Custom);
268 setOperationAction(ISD::SUBE, MVT::i32, Custom);
269 setOperationAction(ISD::ADDC, MVT::i64, Custom);
270 setOperationAction(ISD::ADDE, MVT::i64, Custom);
271 setOperationAction(ISD::SUBC, MVT::i64, Custom);
272 setOperationAction(ISD::SUBE, MVT::i64, Custom);
273
274 // AArch64 lacks both left-rotate and popcount instructions.
275 setOperationAction(ISD::ROTL, MVT::i32, Expand);
276 setOperationAction(ISD::ROTL, MVT::i64, Expand);
277 for (MVT VT : MVT::vector_valuetypes()) {
278 setOperationAction(ISD::ROTL, VT, Expand);
279 setOperationAction(ISD::ROTR, VT, Expand);
280 }
281
282 // AArch64 doesn't have {U|S}MUL_LOHI.
283 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
284 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
285
286 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
287 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
288
289 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
290 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
291 for (MVT VT : MVT::vector_valuetypes()) {
292 setOperationAction(ISD::SDIVREM, VT, Expand);
293 setOperationAction(ISD::UDIVREM, VT, Expand);
294 }
295 setOperationAction(ISD::SREM, MVT::i32, Expand);
296 setOperationAction(ISD::SREM, MVT::i64, Expand);
297 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
298 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
299 setOperationAction(ISD::UREM, MVT::i32, Expand);
300 setOperationAction(ISD::UREM, MVT::i64, Expand);
301
302 // Custom lower Add/Sub/Mul with overflow.
303 setOperationAction(ISD::SADDO, MVT::i32, Custom);
304 setOperationAction(ISD::SADDO, MVT::i64, Custom);
305 setOperationAction(ISD::UADDO, MVT::i32, Custom);
306 setOperationAction(ISD::UADDO, MVT::i64, Custom);
307 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
308 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
309 setOperationAction(ISD::USUBO, MVT::i32, Custom);
310 setOperationAction(ISD::USUBO, MVT::i64, Custom);
311 setOperationAction(ISD::SMULO, MVT::i32, Custom);
312 setOperationAction(ISD::SMULO, MVT::i64, Custom);
313 setOperationAction(ISD::UMULO, MVT::i32, Custom);
314 setOperationAction(ISD::UMULO, MVT::i64, Custom);
315
316 setOperationAction(ISD::FSIN, MVT::f32, Expand);
317 setOperationAction(ISD::FSIN, MVT::f64, Expand);
318 setOperationAction(ISD::FCOS, MVT::f32, Expand);
319 setOperationAction(ISD::FCOS, MVT::f64, Expand);
320 setOperationAction(ISD::FPOW, MVT::f32, Expand);
321 setOperationAction(ISD::FPOW, MVT::f64, Expand);
322 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
323 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
324 if (Subtarget->hasFullFP16())
325 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
326 else
327 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
328
329 setOperationAction(ISD::FREM, MVT::f16, Promote);
330 setOperationAction(ISD::FREM, MVT::v4f16, Promote);
331 setOperationAction(ISD::FREM, MVT::v8f16, Promote);
332 setOperationAction(ISD::FPOW, MVT::f16, Promote);
333 setOperationAction(ISD::FPOW, MVT::v4f16, Promote);
334 setOperationAction(ISD::FPOW, MVT::v8f16, Promote);
335 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
336 setOperationAction(ISD::FCOS, MVT::f16, Promote);
337 setOperationAction(ISD::FCOS, MVT::v4f16, Promote);
338 setOperationAction(ISD::FCOS, MVT::v8f16, Promote);
339 setOperationAction(ISD::FSIN, MVT::f16, Promote);
340 setOperationAction(ISD::FSIN, MVT::v4f16, Promote);
341 setOperationAction(ISD::FSIN, MVT::v8f16, Promote);
342 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
343 setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
344 setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
345 setOperationAction(ISD::FEXP, MVT::f16, Promote);
346 setOperationAction(ISD::FEXP, MVT::v4f16, Promote);
347 setOperationAction(ISD::FEXP, MVT::v8f16, Promote);
348 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
349 setOperationAction(ISD::FEXP2, MVT::v4f16, Promote);
350 setOperationAction(ISD::FEXP2, MVT::v8f16, Promote);
351 setOperationAction(ISD::FLOG, MVT::f16, Promote);
352 setOperationAction(ISD::FLOG, MVT::v4f16, Promote);
353 setOperationAction(ISD::FLOG, MVT::v8f16, Promote);
354 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
355 setOperationAction(ISD::FLOG2, MVT::v4f16, Promote);
356 setOperationAction(ISD::FLOG2, MVT::v8f16, Promote);
357 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
358 setOperationAction(ISD::FLOG10, MVT::v4f16, Promote);
359 setOperationAction(ISD::FLOG10, MVT::v8f16, Promote);
360
361 if (!Subtarget->hasFullFP16()) {
362 setOperationAction(ISD::SELECT, MVT::f16, Promote);
363 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
364 setOperationAction(ISD::SETCC, MVT::f16, Promote);
365 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
366 setOperationAction(ISD::FADD, MVT::f16, Promote);
367 setOperationAction(ISD::FSUB, MVT::f16, Promote);
368 setOperationAction(ISD::FMUL, MVT::f16, Promote);
369 setOperationAction(ISD::FDIV, MVT::f16, Promote);
370 setOperationAction(ISD::FMA, MVT::f16, Promote);
371 setOperationAction(ISD::FNEG, MVT::f16, Promote);
372 setOperationAction(ISD::FABS, MVT::f16, Promote);
373 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
374 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
375 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
376 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
377 setOperationAction(ISD::FRINT, MVT::f16, Promote);
378 setOperationAction(ISD::FROUND, MVT::f16, Promote);
379 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
380 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
381 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
382 setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
383 setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
384
385 // promote v4f16 to v4f32 when that is known to be safe.
386 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
387 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
388 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
389 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
390 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
391 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
392 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
393 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
394 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
395 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
396 AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
397 AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
398
399 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
400 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
401 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
402 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
403 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
404 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
405 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
406 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
407 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
408 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
409 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
410 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
411 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
412 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
413 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
414
415 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
416 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
417 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
418 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
419 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
420 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
421 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
422 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
423 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
424 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
425 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
426 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
427 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
428 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
429 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
430 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
431 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
432 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
433 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
434 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
435 }
436
437 // AArch64 has implementations of a lot of rounding-like FP operations.
438 for (MVT Ty : {MVT::f32, MVT::f64}) {
439 setOperationAction(ISD::FFLOOR, Ty, Legal);
440 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
441 setOperationAction(ISD::FCEIL, Ty, Legal);
442 setOperationAction(ISD::FRINT, Ty, Legal);
443 setOperationAction(ISD::FTRUNC, Ty, Legal);
444 setOperationAction(ISD::FROUND, Ty, Legal);
445 setOperationAction(ISD::FMINNUM, Ty, Legal);
446 setOperationAction(ISD::FMAXNUM, Ty, Legal);
447 setOperationAction(ISD::FMINNAN, Ty, Legal);
448 setOperationAction(ISD::FMAXNAN, Ty, Legal);
449 }
450
451 if (Subtarget->hasFullFP16()) {
452 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
453 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
454 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
455 setOperationAction(ISD::FRINT, MVT::f16, Legal);
456 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
457 setOperationAction(ISD::FROUND, MVT::f16, Legal);
458 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
459 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
460 setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
461 setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
462 }
463
464 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
465
466 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
467
468 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
469 // This requires the Performance Monitors extension.
470 if (Subtarget->hasPerfMon())
471 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
472
473 if (Subtarget->isTargetMachO()) {
474 // For iOS, we don't want to the normal expansion of a libcall to
475 // sincos. We want to issue a libcall to __sincos_stret to avoid memory
476 // traffic.
477 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
478 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
479 } else {
480 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
481 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
482 }
483
484 // Make floating-point constants legal for the large code model, so they don't
485 // become loads from the constant pool.
486 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
487 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
488 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
489 }
490
491 // AArch64 does not have floating-point extending loads, i1 sign-extending
492 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
493 for (MVT VT : MVT::fp_valuetypes()) {
494 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
495 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
496 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
497 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
498 }
499 for (MVT VT : MVT::integer_valuetypes())
500 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
501
502 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
503 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
504 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
505 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
506 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
507 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
508 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
509
510 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
511 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
512
513 // Indexed loads and stores are supported.
514 for (unsigned im = (unsigned)ISD::PRE_INC;
515 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
516 setIndexedLoadAction(im, MVT::i8, Legal);
517 setIndexedLoadAction(im, MVT::i16, Legal);
518 setIndexedLoadAction(im, MVT::i32, Legal);
519 setIndexedLoadAction(im, MVT::i64, Legal);
520 setIndexedLoadAction(im, MVT::f64, Legal);
521 setIndexedLoadAction(im, MVT::f32, Legal);
522 setIndexedLoadAction(im, MVT::f16, Legal);
523 setIndexedStoreAction(im, MVT::i8, Legal);
524 setIndexedStoreAction(im, MVT::i16, Legal);
525 setIndexedStoreAction(im, MVT::i32, Legal);
526 setIndexedStoreAction(im, MVT::i64, Legal);
527 setIndexedStoreAction(im, MVT::f64, Legal);
528 setIndexedStoreAction(im, MVT::f32, Legal);
529 setIndexedStoreAction(im, MVT::f16, Legal);
530 }
531
532 // Trap.
533 setOperationAction(ISD::TRAP, MVT::Other, Legal);
534
535 // We combine OR nodes for bitfield operations.
536 setTargetDAGCombine(ISD::OR);
537
538 // Vector add and sub nodes may conceal a high-half opportunity.
539 // Also, try to fold ADD into CSINC/CSINV..
540 setTargetDAGCombine(ISD::ADD);
541 setTargetDAGCombine(ISD::SUB);
542 setTargetDAGCombine(ISD::SRL);
543 setTargetDAGCombine(ISD::XOR);
544 setTargetDAGCombine(ISD::SINT_TO_FP);
545 setTargetDAGCombine(ISD::UINT_TO_FP);
546
547 setTargetDAGCombine(ISD::FP_TO_SINT);
548 setTargetDAGCombine(ISD::FP_TO_UINT);
549 setTargetDAGCombine(ISD::FDIV);
550
551 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
552
553 setTargetDAGCombine(ISD::ANY_EXTEND);
554 setTargetDAGCombine(ISD::ZERO_EXTEND);
555 setTargetDAGCombine(ISD::SIGN_EXTEND);
556 setTargetDAGCombine(ISD::BITCAST);
557 setTargetDAGCombine(ISD::CONCAT_VECTORS);
558 setTargetDAGCombine(ISD::STORE);
559 if (Subtarget->supportsAddressTopByteIgnored())
560 setTargetDAGCombine(ISD::LOAD);
561
562 setTargetDAGCombine(ISD::MUL);
563
564 setTargetDAGCombine(ISD::SELECT);
565 setTargetDAGCombine(ISD::VSELECT);
566
567 setTargetDAGCombine(ISD::INTRINSIC_VOID);
568 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
569 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
570
571 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
572 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
573 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
574
575 setStackPointerRegisterToSaveRestore(AArch64::SP);
576
577 setSchedulingPreference(Sched::Hybrid);
578
579 EnableExtLdPromotion = true;
580
581 // Set required alignment.
582 setMinFunctionAlignment(2);
583 // Set preferred alignments.
584 setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
585 setPrefLoopAlignment(STI.getPrefLoopAlignment());
586
587 // Only change the limit for entries in a jump table if specified by
588 // the subtarget, but not at the command line.
589 unsigned MaxJT = STI.getMaximumJumpTableSize();
590 if (MaxJT && getMaximumJumpTableSize() == 0)
591 setMaximumJumpTableSize(MaxJT);
592
593 setHasExtractBitsInsn(true);
594
595 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
596
597 if (Subtarget->hasNEON()) {
598 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
599 // silliness like this:
600 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
601 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
602 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
603 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
604 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
605 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
606 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
607 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
608 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
609 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
610 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
611 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
612 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
613 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
614 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
615 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
616 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
617 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
618 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
619 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
620 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
621 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
622 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
623 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
624 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
625
626 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
627 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
628 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
629 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
630 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
631
632 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
633
634 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
635 // elements smaller than i32, so promote the input to i32 first.
636 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote);
637 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
638 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
639 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
640 // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
641 // -> v8f16 conversions.
642 setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote);
643 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote);
644 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
645 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote);
646 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
647 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
648 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
649 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
650 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
651 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
652 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
653 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
654 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
655
656 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
657 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
658
659 setOperationAction(ISD::CTTZ, MVT::v2i8, Expand);
660 setOperationAction(ISD::CTTZ, MVT::v4i16, Expand);
661 setOperationAction(ISD::CTTZ, MVT::v2i32, Expand);
662 setOperationAction(ISD::CTTZ, MVT::v1i64, Expand);
663 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
664 setOperationAction(ISD::CTTZ, MVT::v8i16, Expand);
665 setOperationAction(ISD::CTTZ, MVT::v4i32, Expand);
666 setOperationAction(ISD::CTTZ, MVT::v2i64, Expand);
667
668 // AArch64 doesn't have MUL.2d:
669 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
670 // Custom handling for some quad-vector types to detect MULL.
671 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
672 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
673 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
674
675 // Vector reductions
676 for (MVT VT : MVT::integer_valuetypes()) {
677 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
678 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
679 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
680 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
681 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
682 }
683 for (MVT VT : MVT::fp_valuetypes()) {
684 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
685 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
686 }
687
688 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
689 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
690 // Likewise, narrowing and extending vector loads/stores aren't handled
691 // directly.
692 for (MVT VT : MVT::vector_valuetypes()) {
693 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
694
695 setOperationAction(ISD::MULHS, VT, Expand);
696 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
697 setOperationAction(ISD::MULHU, VT, Expand);
698 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
699
700 setOperationAction(ISD::BSWAP, VT, Expand);
701
702 for (MVT InnerVT : MVT::vector_valuetypes()) {
703 setTruncStoreAction(VT, InnerVT, Expand);
704 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
705 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
706 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
707 }
708 }
709
710 // AArch64 has implementations of a lot of rounding-like FP operations.
711 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
712 setOperationAction(ISD::FFLOOR, Ty, Legal);
713 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
714 setOperationAction(ISD::FCEIL, Ty, Legal);
715 setOperationAction(ISD::FRINT, Ty, Legal);
716 setOperationAction(ISD::FTRUNC, Ty, Legal);
717 setOperationAction(ISD::FROUND, Ty, Legal);
718 }
719 }
720
721 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
722}
723
724void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
725 if (VT == MVT::v2f32 || VT == MVT::v4f16) {
726 setOperationAction(ISD::LOAD, VT, Promote);
727 AddPromotedToType(ISD::LOAD, VT, MVT::v2i32);
728
729 setOperationAction(ISD::STORE, VT, Promote);
730 AddPromotedToType(ISD::STORE, VT, MVT::v2i32);
731 } else if (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16) {
732 setOperationAction(ISD::LOAD, VT, Promote);
733 AddPromotedToType(ISD::LOAD, VT, MVT::v2i64);
734
735 setOperationAction(ISD::STORE, VT, Promote);
736 AddPromotedToType(ISD::STORE, VT, MVT::v2i64);
737 }
738
739 // Mark vector float intrinsics as expand.
740 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
741 setOperationAction(ISD::FSIN, VT, Expand);
742 setOperationAction(ISD::FCOS, VT, Expand);
743 setOperationAction(ISD::FPOW, VT, Expand);
744 setOperationAction(ISD::FLOG, VT, Expand);
745 setOperationAction(ISD::FLOG2, VT, Expand);
746 setOperationAction(ISD::FLOG10, VT, Expand);
747 setOperationAction(ISD::FEXP, VT, Expand);
748 setOperationAction(ISD::FEXP2, VT, Expand);
749
750 // But we do support custom-lowering for FCOPYSIGN.
751 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
752 }
753
754 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
755 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
756 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
757 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
758 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
759 setOperationAction(ISD::SRA, VT, Custom);
760 setOperationAction(ISD::SRL, VT, Custom);
761 setOperationAction(ISD::SHL, VT, Custom);
762 setOperationAction(ISD::AND, VT, Custom);
763 setOperationAction(ISD::OR, VT, Custom);
764 setOperationAction(ISD::SETCC, VT, Custom);
765 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
766
767 setOperationAction(ISD::SELECT, VT, Expand);
768 setOperationAction(ISD::SELECT_CC, VT, Expand);
769 setOperationAction(ISD::VSELECT, VT, Expand);
770 for (MVT InnerVT : MVT::all_valuetypes())
771 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
772
773 // CNT supports only B element sizes.
774 if (VT != MVT::v8i8 && VT != MVT::v16i8)
775 setOperationAction(ISD::CTPOP, VT, Expand);
776
777 setOperationAction(ISD::UDIV, VT, Expand);
778 setOperationAction(ISD::SDIV, VT, Expand);
779 setOperationAction(ISD::UREM, VT, Expand);
780 setOperationAction(ISD::SREM, VT, Expand);
781 setOperationAction(ISD::FREM, VT, Expand);
782
783 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
784 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
785
786 if (!VT.isFloatingPoint())
787 setOperationAction(ISD::ABS, VT, Legal);
788
789 // [SU][MIN|MAX] are available for all NEON types apart from i64.
790 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
791 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
792 setOperationAction(Opcode, VT, Legal);
793
794 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
795 if (VT.isFloatingPoint() &&
796 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
797 for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
798 ISD::FMINNUM, ISD::FMAXNUM})
799 setOperationAction(Opcode, VT, Legal);
800
801 if (Subtarget->isLittleEndian()) {
802 for (unsigned im = (unsigned)ISD::PRE_INC;
803 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
804 setIndexedLoadAction(im, VT, Legal);
805 setIndexedStoreAction(im, VT, Legal);
806 }
807 }
808}
809
810void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
811 addRegisterClass(VT, &AArch64::FPR64RegClass);
812 addTypeForNEON(VT, MVT::v2i32);
813}
814
815void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
816 addRegisterClass(VT, &AArch64::FPR128RegClass);
817 addTypeForNEON(VT, MVT::v4i32);
818}
819
820EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
821 EVT VT) const {
822 if (!VT.isVector())
823 return MVT::i32;
824 return VT.changeVectorElementTypeToInteger();
825}
826
827static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
828 const APInt &Demanded,
829 TargetLowering::TargetLoweringOpt &TLO,
830 unsigned NewOpc) {
831 uint64_t OldImm = Imm, NewImm, Enc;
832 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
833
834 // Return if the immediate is already all zeros, all ones, a bimm32 or a
835 // bimm64.
836 if (Imm == 0 || Imm == Mask ||
837 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
838 return false;
839
840 unsigned EltSize = Size;
841 uint64_t DemandedBits = Demanded.getZExtValue();
842
843 // Clear bits that are not demanded.
844 Imm &= DemandedBits;
845
846 while (true) {
847 // The goal here is to set the non-demanded bits in a way that minimizes
848 // the number of switching between 0 and 1. In order to achieve this goal,
849 // we set the non-demanded bits to the value of the preceding demanded bits.
850 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
851 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
852 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
853 // The final result is 0b11000011.
854 uint64_t NonDemandedBits = ~DemandedBits;
855 uint64_t InvertedImm = ~Imm & DemandedBits;
856 uint64_t RotatedImm =
857 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
858 NonDemandedBits;
859 uint64_t Sum = RotatedImm + NonDemandedBits;
860 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
861 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
862 NewImm = (Imm | Ones) & Mask;
863
864 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
865 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
866 // we halve the element size and continue the search.
867 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
868 break;
869
870 // We cannot shrink the element size any further if it is 2-bits.
871 if (EltSize == 2)
872 return false;
873
874 EltSize /= 2;
875 Mask >>= EltSize;
876 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
877
878 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
879 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
880 return false;
881
882 // Merge the upper and lower halves of Imm and DemandedBits.
883 Imm |= Hi;
884 DemandedBits |= DemandedBitsHi;
885 }
886
887 ++NumOptimizedImms;
888
889 // Replicate the element across the register width.
890 while (EltSize < Size) {
891 NewImm |= NewImm << EltSize;
892 EltSize *= 2;
893 }
894
895 (void)OldImm;
896 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 897, __extension__ __PRETTY_FUNCTION__))
897 "demanded bits should never be altered")(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 897, __extension__ __PRETTY_FUNCTION__))
;
898 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")(static_cast <bool> (OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? void (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 898, __extension__ __PRETTY_FUNCTION__))
;
899
900 // Create the new constant immediate node.
901 EVT VT = Op.getValueType();
902 SDLoc DL(Op);
903 SDValue New;
904
905 // If the new constant immediate is all-zeros or all-ones, let the target
906 // independent DAG combine optimize this node.
907 if (NewImm == 0 || NewImm == OrigMask) {
908 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
909 TLO.DAG.getConstant(NewImm, DL, VT));
910 // Otherwise, create a machine node so that target independent DAG combine
911 // doesn't undo this optimization.
912 } else {
913 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
914 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
915 New = SDValue(
916 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
917 }
918
919 return TLO.CombineTo(Op, New);
920}
921
922bool AArch64TargetLowering::targetShrinkDemandedConstant(
923 SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
924 // Delay this optimization to as late as possible.
925 if (!TLO.LegalOps)
926 return false;
927
928 if (!EnableOptimizeLogicalImm)
929 return false;
930
931 EVT VT = Op.getValueType();
932 if (VT.isVector())
933 return false;
934
935 unsigned Size = VT.getSizeInBits();
936 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 937, __extension__ __PRETTY_FUNCTION__))
937 "i32 or i64 is expected after legalization.")(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 937, __extension__ __PRETTY_FUNCTION__))
;
938
939 // Exit early if we demand all bits.
940 if (Demanded.countPopulation() == Size)
941 return false;
942
943 unsigned NewOpc;
944 switch (Op.getOpcode()) {
945 default:
946 return false;
947 case ISD::AND:
948 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
949 break;
950 case ISD::OR:
951 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
952 break;
953 case ISD::XOR:
954 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
955 break;
956 }
957 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
958 if (!C)
959 return false;
960 uint64_t Imm = C->getZExtValue();
961 return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
962}
963
964/// computeKnownBitsForTargetNode - Determine which of the bits specified in
965/// Mask are known to be either zero or one and return them Known.
966void AArch64TargetLowering::computeKnownBitsForTargetNode(
967 const SDValue Op, KnownBits &Known,
968 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
969 switch (Op.getOpcode()) {
970 default:
971 break;
972 case AArch64ISD::CSEL: {
973 KnownBits Known2;
974 DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
975 DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
976 Known.Zero &= Known2.Zero;
977 Known.One &= Known2.One;
978 break;
979 }
980 case ISD::INTRINSIC_W_CHAIN: {
981 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
982 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
983 switch (IntID) {
984 default: return;
985 case Intrinsic::aarch64_ldaxr:
986 case Intrinsic::aarch64_ldxr: {
987 unsigned BitWidth = Known.getBitWidth();
988 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
989 unsigned MemBits = VT.getScalarSizeInBits();
990 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
991 return;
992 }
993 }
994 break;
995 }
996 case ISD::INTRINSIC_WO_CHAIN:
997 case ISD::INTRINSIC_VOID: {
998 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
999 switch (IntNo) {
1000 default:
1001 break;
1002 case Intrinsic::aarch64_neon_umaxv:
1003 case Intrinsic::aarch64_neon_uminv: {
1004 // Figure out the datatype of the vector operand. The UMINV instruction
1005 // will zero extend the result, so we can mark as known zero all the
1006 // bits larger than the element datatype. 32-bit or larget doesn't need
1007 // this as those are legal types and will be handled by isel directly.
1008 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1009 unsigned BitWidth = Known.getBitWidth();
1010 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1011 assert(BitWidth >= 8 && "Unexpected width!")(static_cast <bool> (BitWidth >= 8 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1011, __extension__ __PRETTY_FUNCTION__))
;
1012 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1013 Known.Zero |= Mask;
1014 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1015 assert(BitWidth >= 16 && "Unexpected width!")(static_cast <bool> (BitWidth >= 16 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1015, __extension__ __PRETTY_FUNCTION__))
;
1016 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1017 Known.Zero |= Mask;
1018 }
1019 break;
1020 } break;
1021 }
1022 }
1023 }
1024}
1025
1026MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1027 EVT) const {
1028 return MVT::i64;
1029}
1030
1031bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1032 unsigned AddrSpace,
1033 unsigned Align,
1034 bool *Fast) const {
1035 if (Subtarget->requiresStrictAlign())
1036 return false;
1037
1038 if (Fast) {
1039 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1040 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1041 // See comments in performSTORECombine() for more details about
1042 // these conditions.
1043
1044 // Code that uses clang vector extensions can mark that it
1045 // wants unaligned accesses to be treated as fast by
1046 // underspecifying alignment to be 1 or 2.
1047 Align <= 2 ||
1048
1049 // Disregard v2i64. Memcpy lowering produces those and splitting
1050 // them regresses performance on micro-benchmarks and olden/bh.
1051 VT == MVT::v2i64;
1052 }
1053 return true;
1054}
1055
1056FastISel *
1057AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1058 const TargetLibraryInfo *libInfo) const {
1059 return AArch64::createFastISel(funcInfo, libInfo);
1060}
1061
1062const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1063 switch ((AArch64ISD::NodeType)Opcode) {
1064 case AArch64ISD::FIRST_NUMBER: break;
1065 case AArch64ISD::CALL: return "AArch64ISD::CALL";
1066 case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
1067 case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
1068 case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
1069 case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
1070 case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
1071 case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
1072 case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
1073 case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
1074 case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
1075 case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
1076 case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
1077 case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1078 case AArch64ISD::ADC: return "AArch64ISD::ADC";
1079 case AArch64ISD::SBC: return "AArch64ISD::SBC";
1080 case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
1081 case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
1082 case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
1083 case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
1084 case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
1085 case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
1086 case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
1087 case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
1088 case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
1089 case AArch64ISD::DUP: return "AArch64ISD::DUP";
1090 case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
1091 case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
1092 case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
1093 case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
1094 case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
1095 case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
1096 case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
1097 case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
1098 case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
1099 case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
1100 case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
1101 case AArch64ISD::BICi: return "AArch64ISD::BICi";
1102 case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
1103 case AArch64ISD::BSL: return "AArch64ISD::BSL";
1104 case AArch64ISD::NEG: return "AArch64ISD::NEG";
1105 case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
1106 case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
1107 case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
1108 case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
1109 case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
1110 case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
1111 case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
1112 case AArch64ISD::REV16: return "AArch64ISD::REV16";
1113 case AArch64ISD::REV32: return "AArch64ISD::REV32";
1114 case AArch64ISD::REV64: return "AArch64ISD::REV64";
1115 case AArch64ISD::EXT: return "AArch64ISD::EXT";
1116 case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
1117 case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
1118 case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
1119 case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
1120 case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
1121 case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
1122 case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
1123 case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
1124 case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
1125 case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
1126 case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
1127 case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
1128 case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
1129 case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
1130 case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
1131 case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
1132 case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
1133 case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
1134 case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
1135 case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
1136 case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
1137 case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
1138 case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
1139 case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
1140 case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
1141 case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
1142 case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
1143 case AArch64ISD::NOT: return "AArch64ISD::NOT";
1144 case AArch64ISD::BIT: return "AArch64ISD::BIT";
1145 case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
1146 case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
1147 case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
1148 case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
1149 case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
1150 case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
1151 case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
1152 case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
1153 case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
1154 case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
1155 case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
1156 case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
1157 case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
1158 case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
1159 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
1160 case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
1161 case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
1162 case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
1163 case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
1164 case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
1165 case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
1166 case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
1167 case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
1168 case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
1169 case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
1170 case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
1171 case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1172 case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1173 case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1174 case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1175 case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1176 case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1177 case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1178 case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1179 case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1180 case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1181 case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1182 case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1183 case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1184 case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1185 case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1186 case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1187 case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1188 case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1189 }
1190 return nullptr;
1191}
1192
1193MachineBasicBlock *
1194AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
1195 MachineBasicBlock *MBB) const {
1196 // We materialise the F128CSEL pseudo-instruction as some control flow and a
1197 // phi node:
1198
1199 // OrigBB:
1200 // [... previous instrs leading to comparison ...]
1201 // b.ne TrueBB
1202 // b EndBB
1203 // TrueBB:
1204 // ; Fallthrough
1205 // EndBB:
1206 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1207
1208 MachineFunction *MF = MBB->getParent();
1209 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1210 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1211 DebugLoc DL = MI.getDebugLoc();
1212 MachineFunction::iterator It = ++MBB->getIterator();
1213
1214 unsigned DestReg = MI.getOperand(0).getReg();
1215 unsigned IfTrueReg = MI.getOperand(1).getReg();
1216 unsigned IfFalseReg = MI.getOperand(2).getReg();
1217 unsigned CondCode = MI.getOperand(3).getImm();
1218 bool NZCVKilled = MI.getOperand(4).isKill();
1219
1220 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1221 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1222 MF->insert(It, TrueBB);
1223 MF->insert(It, EndBB);
1224
1225 // Transfer rest of current basic-block to EndBB
1226 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1227 MBB->end());
1228 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1229
1230 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1231 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1232 MBB->addSuccessor(TrueBB);
1233 MBB->addSuccessor(EndBB);
1234
1235 // TrueBB falls through to the end.
1236 TrueBB->addSuccessor(EndBB);
1237
1238 if (!NZCVKilled) {
1239 TrueBB->addLiveIn(AArch64::NZCV);
1240 EndBB->addLiveIn(AArch64::NZCV);
1241 }
1242
1243 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1244 .addReg(IfTrueReg)
1245 .addMBB(TrueBB)
1246 .addReg(IfFalseReg)
1247 .addMBB(MBB);
1248
1249 MI.eraseFromParent();
1250 return EndBB;
1251}
1252
1253MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
1254 MachineInstr &MI, MachineBasicBlock *BB) const {
1255 switch (MI.getOpcode()) {
1256 default:
1257#ifndef NDEBUG
1258 MI.dump();
1259#endif
1260 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1260)
;
1261
1262 case AArch64::F128CSEL:
1263 return EmitF128CSEL(MI, BB);
1264
1265 case TargetOpcode::STACKMAP:
1266 case TargetOpcode::PATCHPOINT:
1267 return emitPatchPoint(MI, BB);
1268 }
1269}
1270
1271//===----------------------------------------------------------------------===//
1272// AArch64 Lowering private implementation.
1273//===----------------------------------------------------------------------===//
1274
1275//===----------------------------------------------------------------------===//
1276// Lowering Code
1277//===----------------------------------------------------------------------===//
1278
1279/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1280/// CC
1281static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1282 switch (CC) {
1283 default:
1284 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1284)
;
1285 case ISD::SETNE:
1286 return AArch64CC::NE;
1287 case ISD::SETEQ:
1288 return AArch64CC::EQ;
1289 case ISD::SETGT:
1290 return AArch64CC::GT;
1291 case ISD::SETGE:
1292 return AArch64CC::GE;
1293 case ISD::SETLT:
1294 return AArch64CC::LT;
1295 case ISD::SETLE:
1296 return AArch64CC::LE;
1297 case ISD::SETUGT:
1298 return AArch64CC::HI;
1299 case ISD::SETUGE:
1300 return AArch64CC::HS;
1301 case ISD::SETULT:
1302 return AArch64CC::LO;
1303 case ISD::SETULE:
1304 return AArch64CC::LS;
1305 }
1306}
1307
1308/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1309static void changeFPCCToAArch64CC(ISD::CondCode CC,
1310 AArch64CC::CondCode &CondCode,
1311 AArch64CC::CondCode &CondCode2) {
1312 CondCode2 = AArch64CC::AL;
1313 switch (CC) {
1314 default:
1315 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1315)
;
1316 case ISD::SETEQ:
1317 case ISD::SETOEQ:
1318 CondCode = AArch64CC::EQ;
1319 break;
1320 case ISD::SETGT:
1321 case ISD::SETOGT:
1322 CondCode = AArch64CC::GT;
1323 break;
1324 case ISD::SETGE:
1325 case ISD::SETOGE:
1326 CondCode = AArch64CC::GE;
1327 break;
1328 case ISD::SETOLT:
1329 CondCode = AArch64CC::MI;
1330 break;
1331 case ISD::SETOLE:
1332 CondCode = AArch64CC::LS;
1333 break;
1334 case ISD::SETONE:
1335 CondCode = AArch64CC::MI;
1336 CondCode2 = AArch64CC::GT;
1337 break;
1338 case ISD::SETO:
1339 CondCode = AArch64CC::VC;
1340 break;
1341 case ISD::SETUO:
1342 CondCode = AArch64CC::VS;
1343 break;
1344 case ISD::SETUEQ:
1345 CondCode = AArch64CC::EQ;
1346 CondCode2 = AArch64CC::VS;
1347 break;
1348 case ISD::SETUGT:
1349 CondCode = AArch64CC::HI;
1350 break;
1351 case ISD::SETUGE:
1352 CondCode = AArch64CC::PL;
1353 break;
1354 case ISD::SETLT:
1355 case ISD::SETULT:
1356 CondCode = AArch64CC::LT;
1357 break;
1358 case ISD::SETLE:
1359 case ISD::SETULE:
1360 CondCode = AArch64CC::LE;
1361 break;
1362 case ISD::SETNE:
1363 case ISD::SETUNE:
1364 CondCode = AArch64CC::NE;
1365 break;
1366 }
1367}
1368
1369/// Convert a DAG fp condition code to an AArch64 CC.
1370/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1371/// should be AND'ed instead of OR'ed.
1372static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1373 AArch64CC::CondCode &CondCode,
1374 AArch64CC::CondCode &CondCode2) {
1375 CondCode2 = AArch64CC::AL;
1376 switch (CC) {
1377 default:
1378 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1379 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1379, __extension__ __PRETTY_FUNCTION__))
;
1380 break;
1381 case ISD::SETONE:
1382 // (a one b)
1383 // == ((a olt b) || (a ogt b))
1384 // == ((a ord b) && (a une b))
1385 CondCode = AArch64CC::VC;
1386 CondCode2 = AArch64CC::NE;
1387 break;
1388 case ISD::SETUEQ:
1389 // (a ueq b)
1390 // == ((a uno b) || (a oeq b))
1391 // == ((a ule b) && (a uge b))
1392 CondCode = AArch64CC::PL;
1393 CondCode2 = AArch64CC::LE;
1394 break;
1395 }
1396}
1397
1398/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1399/// CC usable with the vector instructions. Fewer operations are available
1400/// without a real NZCV register, so we have to use less efficient combinations
1401/// to get the same effect.
1402static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1403 AArch64CC::CondCode &CondCode,
1404 AArch64CC::CondCode &CondCode2,
1405 bool &Invert) {
1406 Invert = false;
1407 switch (CC) {
1408 default:
1409 // Mostly the scalar mappings work fine.
1410 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1411 break;
1412 case ISD::SETUO:
1413 Invert = true;
1414 LLVM_FALLTHROUGH[[clang::fallthrough]];
1415 case ISD::SETO:
1416 CondCode = AArch64CC::MI;
1417 CondCode2 = AArch64CC::GE;
1418 break;
1419 case ISD::SETUEQ:
1420 case ISD::SETULT:
1421 case ISD::SETULE:
1422 case ISD::SETUGT:
1423 case ISD::SETUGE:
1424 // All of the compare-mask comparisons are ordered, but we can switch
1425 // between the two by a double inversion. E.g. ULE == !OGT.
1426 Invert = true;
1427 changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
1428 break;
1429 }
1430}
1431
1432static bool isLegalArithImmed(uint64_t C) {
1433 // Matches AArch64DAGToDAGISel::SelectArithImmed().
1434 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1435 DEBUG(dbgs() << "Is imm " << C << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
1436 return IsLegal;
1437}
1438
1439static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1440 const SDLoc &dl, SelectionDAG &DAG) {
1441 EVT VT = LHS.getValueType();
1442 const bool FullFP16 =
1443 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1444
1445 if (VT.isFloatingPoint()) {
1446 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1446, __extension__ __PRETTY_FUNCTION__))
;
1447 if (VT == MVT::f16 && !FullFP16) {
1448 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1449 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1450 VT = MVT::f32;
1451 }
1452 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1453 }
1454
1455 // The CMP instruction is just an alias for SUBS, and representing it as
1456 // SUBS means that it's possible to get CSE with subtract operations.
1457 // A later phase can perform the optimization of setting the destination
1458 // register to WZR/XZR if it ends up being unused.
1459 unsigned Opcode = AArch64ISD::SUBS;
1460
1461 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
1462 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1463 // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
1464 // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
1465 // can be set differently by this operation. It comes down to whether
1466 // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1467 // everything is fine. If not then the optimization is wrong. Thus general
1468 // comparisons are only valid if op2 != 0.
1469
1470 // So, finally, the only LLVM-native comparisons that don't mention C and V
1471 // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1472 // the absence of information about op2.
1473 Opcode = AArch64ISD::ADDS;
1474 RHS = RHS.getOperand(1);
1475 } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
1476 !isUnsignedIntSetCC(CC)) {
1477 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1478 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1479 // of the signed comparisons.
1480 Opcode = AArch64ISD::ANDS;
1481 RHS = LHS.getOperand(1);
1482 LHS = LHS.getOperand(0);
1483 }
1484
1485 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1486 .getValue(1);
1487}
1488
1489/// \defgroup AArch64CCMP CMP;CCMP matching
1490///
1491/// These functions deal with the formation of CMP;CCMP;... sequences.
1492/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1493/// a comparison. They set the NZCV flags to a predefined value if their
1494/// predicate is false. This allows to express arbitrary conjunctions, for
1495/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
1496/// expressed as:
1497/// cmp A
1498/// ccmp B, inv(CB), CA
1499/// check for CB flags
1500///
1501/// In general we can create code for arbitrary "... (and (and A B) C)"
1502/// sequences. We can also implement some "or" expressions, because "(or A B)"
1503/// is equivalent to "not (and (not A) (not B))" and we can implement some
1504/// negation operations:
1505/// We can negate the results of a single comparison by inverting the flags
1506/// used when the predicate fails and inverting the flags tested in the next
1507/// instruction; We can also negate the results of the whole previous
1508/// conditional compare sequence by inverting the flags tested in the next
1509/// instruction. However there is no way to negate the result of a partial
1510/// sequence.
1511///
1512/// Therefore on encountering an "or" expression we can negate the subtree on
1513/// one side and have to be able to push the negate to the leafs of the subtree
1514/// on the other side (see also the comments in code). As complete example:
1515/// "or (or (setCA (cmp A)) (setCB (cmp B)))
1516/// (and (setCC (cmp C)) (setCD (cmp D)))"
1517/// is transformed to
1518/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
1519/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1520/// and implemented as:
1521/// cmp C
1522/// ccmp D, inv(CD), CC
1523/// ccmp A, CA, inv(CD)
1524/// ccmp B, CB, inv(CA)
1525/// check for CB flags
1526/// A counterexample is "or (and A B) (and C D)" which cannot be implemented
1527/// by conditional compare sequences.
1528/// @{
1529
1530/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1531static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1532 ISD::CondCode CC, SDValue CCOp,
1533 AArch64CC::CondCode Predicate,
1534 AArch64CC::CondCode OutCC,
1535 const SDLoc &DL, SelectionDAG &DAG) {
1536 unsigned Opcode = 0;
1537 const bool FullFP16 =
1538 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1539
1540 if (LHS.getValueType().isFloatingPoint()) {
1541 assert(LHS.getValueType() != MVT::f128)(static_cast <bool> (LHS.getValueType() != MVT::f128) ?
void (0) : __assert_fail ("LHS.getValueType() != MVT::f128",
"/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1541, __extension__ __PRETTY_FUNCTION__))
;
1542 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
1543 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1544 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1545 }
1546 Opcode = AArch64ISD::FCCMP;
1547 } else if (RHS.getOpcode() == ISD::SUB) {
1548 SDValue SubOp0 = RHS.getOperand(0);
1549 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1550 // See emitComparison() on why we can only do this for SETEQ and SETNE.
1551 Opcode = AArch64ISD::CCMN;
1552 RHS = RHS.getOperand(1);
1553 }
1554 }
1555 if (Opcode == 0)
1556 Opcode = AArch64ISD::CCMP;
1557
1558 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1559 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1560 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1561 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1562 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1563}
1564
1565/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
1566/// CanPushNegate is set to true if we can push a negate operation through
1567/// the tree in a was that we are left with AND operations and negate operations
1568/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
1569/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
1570/// brought into such a form.
1571static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate,
1572 unsigned Depth = 0) {
1573 if (!Val.hasOneUse())
1574 return false;
1575 unsigned Opcode = Val->getOpcode();
1576 if (Opcode == ISD::SETCC) {
1577 if (Val->getOperand(0).getValueType() == MVT::f128)
1578 return false;
1579 CanNegate = true;
1580 return true;
1581 }
1582 // Protect against exponential runtime and stack overflow.
1583 if (Depth > 6)
1584 return false;
1585 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1586 SDValue O0 = Val->getOperand(0);
1587 SDValue O1 = Val->getOperand(1);
1588 bool CanNegateL;
1589 if (!isConjunctionDisjunctionTree(O0, CanNegateL, Depth+1))
1590 return false;
1591 bool CanNegateR;
1592 if (!isConjunctionDisjunctionTree(O1, CanNegateR, Depth+1))
1593 return false;
1594
1595 if (Opcode == ISD::OR) {
1596 // For an OR expression we need to be able to negate at least one side or
1597 // we cannot do the transformation at all.
1598 if (!CanNegateL && !CanNegateR)
1599 return false;
1600 // We can however change a (not (or x y)) to (and (not x) (not y)) if we
1601 // can negate the x and y subtrees.
1602 CanNegate = CanNegateL && CanNegateR;
1603 } else {
1604 // If the operands are OR expressions then we finally need to negate their
1605 // outputs, we can only do that for the operand with emitted last by
1606 // negating OutCC, not for both operands.
1607 bool NeedsNegOutL = O0->getOpcode() == ISD::OR;
1608 bool NeedsNegOutR = O1->getOpcode() == ISD::OR;
1609 if (NeedsNegOutL && NeedsNegOutR)
1610 return false;
1611 // We cannot negate an AND operation (it would become an OR),
1612 CanNegate = false;
1613 }
1614 return true;
1615 }
1616 return false;
1617}
1618
1619/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1620/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1621/// Tries to transform the given i1 producing node @p Val to a series compare
1622/// and conditional compare operations. @returns an NZCV flags producing node
1623/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1624/// transformation was not possible.
1625/// On recursive invocations @p PushNegate may be set to true to have negation
1626/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
1627/// for the comparisons in the current subtree; @p Depth limits the search
1628/// depth to avoid stack overflow.
1629static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val,
1630 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1631 AArch64CC::CondCode Predicate) {
1632 // We're at a tree leaf, produce a conditional comparison operation.
1633 unsigned Opcode = Val->getOpcode();
1634 if (Opcode == ISD::SETCC) {
1635 SDValue LHS = Val->getOperand(0);
1636 SDValue RHS = Val->getOperand(1);
1637 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1638 bool isInteger = LHS.getValueType().isInteger();
1639 if (Negate)
1640 CC = getSetCCInverse(CC, isInteger);
1641 SDLoc DL(Val);
1642 // Determine OutCC and handle FP special case.
1643 if (isInteger) {
1644 OutCC = changeIntCCToAArch64CC(CC);
1645 } else {
1646 assert(LHS.getValueType().isFloatingPoint())(static_cast <bool> (LHS.getValueType().isFloatingPoint
()) ? void (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1646, __extension__ __PRETTY_FUNCTION__))
;
1647 AArch64CC::CondCode ExtraCC;
1648 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1649 // Some floating point conditions can't be tested with a single condition
1650 // code. Construct an additional comparison in this case.
1651 if (ExtraCC != AArch64CC::AL) {
1652 SDValue ExtraCmp;
1653 if (!CCOp.getNode())
1654 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1655 else
1656 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1657 ExtraCC, DL, DAG);
1658 CCOp = ExtraCmp;
1659 Predicate = ExtraCC;
1660 }
1661 }
1662
1663 // Produce a normal comparison if we are first in the chain
1664 if (!CCOp)
1665 return emitComparison(LHS, RHS, CC, DL, DAG);
1666 // Otherwise produce a ccmp.
1667 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
1668 DAG);
1669 }
1670 assert((Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) &&(static_cast <bool> ((Opcode == ISD::AND || (Opcode == ISD
::OR && Val->hasOneUse())) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1671, __extension__ __PRETTY_FUNCTION__))
1671 "Valid conjunction/disjunction tree")(static_cast <bool> ((Opcode == ISD::AND || (Opcode == ISD
::OR && Val->hasOneUse())) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1671, __extension__ __PRETTY_FUNCTION__))
;
1672
1673 // Check if both sides can be transformed.
1674 SDValue LHS = Val->getOperand(0);
1675 SDValue RHS = Val->getOperand(1);
1676
1677 // In case of an OR we need to negate our operands and the result.
1678 // (A v B) <=> not(not(A) ^ not(B))
1679 bool NegateOpsAndResult = Opcode == ISD::OR;
1680 // We can negate the results of all previous operations by inverting the
1681 // predicate flags giving us a free negation for one side. The other side
1682 // must be negatable by itself.
1683 if (NegateOpsAndResult) {
1684 // See which side we can negate.
1685 bool CanNegateL;
1686 bool isValidL = isConjunctionDisjunctionTree(LHS, CanNegateL);
1687 assert(isValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (isValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("isValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1687, __extension__ __PRETTY_FUNCTION__))
;
1688 (void)isValidL;
1689
1690#ifndef NDEBUG
1691 bool CanNegateR;
1692 bool isValidR = isConjunctionDisjunctionTree(RHS, CanNegateR);
1693 assert(isValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (isValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("isValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1693, __extension__ __PRETTY_FUNCTION__))
;
1694 assert((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree")(static_cast <bool> ((CanNegateL || CanNegateR) &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("(CanNegateL || CanNegateR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1694, __extension__ __PRETTY_FUNCTION__))
;
1695#endif
1696
1697 // Order the side which we cannot negate to RHS so we can emit it first.
1698 if (!CanNegateL)
1699 std::swap(LHS, RHS);
1700 } else {
1701 bool NeedsNegOutL = LHS->getOpcode() == ISD::OR;
1702 assert((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) &&(static_cast <bool> ((!NeedsNegOutL || RHS->getOpcode
() != ISD::OR) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1703, __extension__ __PRETTY_FUNCTION__))
1703 "Valid conjunction/disjunction tree")(static_cast <bool> ((!NeedsNegOutL || RHS->getOpcode
() != ISD::OR) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1703, __extension__ __PRETTY_FUNCTION__))
;
1704 // Order the side where we need to negate the output flags to RHS so it
1705 // gets emitted first.
1706 if (NeedsNegOutL)
1707 std::swap(LHS, RHS);
1708 }
1709
1710 // Emit RHS. If we want to negate the tree we only need to push a negate
1711 // through if we are already in a PushNegate case, otherwise we can negate
1712 // the "flags to test" afterwards.
1713 AArch64CC::CondCode RHSCC;
1714 SDValue CmpR = emitConjunctionDisjunctionTreeRec(DAG, RHS, RHSCC, Negate,
1715 CCOp, Predicate);
1716 if (NegateOpsAndResult && !Negate)
1717 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1718 // Emit LHS. We may need to negate it.
1719 SDValue CmpL = emitConjunctionDisjunctionTreeRec(DAG, LHS, OutCC,
1720 NegateOpsAndResult, CmpR,
1721 RHSCC);
1722 // If we transformed an OR to and AND then we have to negate the result
1723 // (or absorb the Negate parameter).
1724 if (NegateOpsAndResult && !Negate)
1725 OutCC = AArch64CC::getInvertedCondCode(OutCC);
1726 return CmpL;
1727}
1728
1729/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1730/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1731/// \see emitConjunctionDisjunctionTreeRec().
1732static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
1733 AArch64CC::CondCode &OutCC) {
1734 bool CanNegate;
1735 if (!isConjunctionDisjunctionTree(Val, CanNegate))
1736 return SDValue();
1737
1738 return emitConjunctionDisjunctionTreeRec(DAG, Val, OutCC, false, SDValue(),
1739 AArch64CC::AL);
1740}
1741
1742/// @}
1743
1744static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1745 SDValue &AArch64cc, SelectionDAG &DAG,
1746 const SDLoc &dl) {
1747 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1748 EVT VT = RHS.getValueType();
1749 uint64_t C = RHSC->getZExtValue();
1750 if (!isLegalArithImmed(C)) {
1751 // Constant does not fit, try adjusting it by one?
1752 switch (CC) {
1753 default:
1754 break;
1755 case ISD::SETLT:
1756 case ISD::SETGE:
1757 if ((VT == MVT::i32 && C != 0x80000000 &&
1758 isLegalArithImmed((uint32_t)(C - 1))) ||
1759 (VT == MVT::i64 && C != 0x80000000ULL &&
1760 isLegalArithImmed(C - 1ULL))) {
1761 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1762 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1763 RHS = DAG.getConstant(C, dl, VT);
1764 }
1765 break;
1766 case ISD::SETULT:
1767 case ISD::SETUGE:
1768 if ((VT == MVT::i32 && C != 0 &&
1769 isLegalArithImmed((uint32_t)(C - 1))) ||
1770 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
1771 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1772 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1773 RHS = DAG.getConstant(C, dl, VT);
1774 }
1775 break;
1776 case ISD::SETLE:
1777 case ISD::SETGT:
1778 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
1779 isLegalArithImmed((uint32_t)(C + 1))) ||
1780 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
1781 isLegalArithImmed(C + 1ULL))) {
1782 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1783 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1784 RHS = DAG.getConstant(C, dl, VT);
1785 }
1786 break;
1787 case ISD::SETULE:
1788 case ISD::SETUGT:
1789 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
1790 isLegalArithImmed((uint32_t)(C + 1))) ||
1791 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
1792 isLegalArithImmed(C + 1ULL))) {
1793 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1794 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1795 RHS = DAG.getConstant(C, dl, VT);
1796 }
1797 break;
1798 }
1799 }
1800 }
1801 SDValue Cmp;
1802 AArch64CC::CondCode AArch64CC;
1803 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
1804 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
1805
1806 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
1807 // For the i8 operand, the largest immediate is 255, so this can be easily
1808 // encoded in the compare instruction. For the i16 operand, however, the
1809 // largest immediate cannot be encoded in the compare.
1810 // Therefore, use a sign extending load and cmn to avoid materializing the
1811 // -1 constant. For example,
1812 // movz w1, #65535
1813 // ldrh w0, [x0, #0]
1814 // cmp w0, w1
1815 // >
1816 // ldrsh w0, [x0, #0]
1817 // cmn w0, #1
1818 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
1819 // if and only if (sext LHS) == (sext RHS). The checks are in place to
1820 // ensure both the LHS and RHS are truly zero extended and to make sure the
1821 // transformation is profitable.
1822 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
1823 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
1824 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
1825 LHS.getNode()->hasNUsesOfValue(1, 0)) {
1826 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
1827 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
1828 SDValue SExt =
1829 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
1830 DAG.getValueType(MVT::i16));
1831 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
1832 RHS.getValueType()),
1833 CC, dl, DAG);
1834 AArch64CC = changeIntCCToAArch64CC(CC);
1835 }
1836 }
1837
1838 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
1839 if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
1840 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
1841 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
1842 }
1843 }
1844 }
1845
1846 if (!Cmp) {
1847 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
1848 AArch64CC = changeIntCCToAArch64CC(CC);
1849 }
1850 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
1851 return Cmp;
1852}
1853
1854static std::pair<SDValue, SDValue>
1855getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
1856 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1857, __extension__ __PRETTY_FUNCTION__))
1857 "Unsupported value type")(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1857, __extension__ __PRETTY_FUNCTION__))
;
1858 SDValue Value, Overflow;
1859 SDLoc DL(Op);
1860 SDValue LHS = Op.getOperand(0);
1861 SDValue RHS = Op.getOperand(1);
1862 unsigned Opc = 0;
1863 switch (Op.getOpcode()) {
1864 default:
1865 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1865)
;
1866 case ISD::SADDO:
1867 Opc = AArch64ISD::ADDS;
1868 CC = AArch64CC::VS;
1869 break;
1870 case ISD::UADDO:
1871 Opc = AArch64ISD::ADDS;
1872 CC = AArch64CC::HS;
1873 break;
1874 case ISD::SSUBO:
1875 Opc = AArch64ISD::SUBS;
1876 CC = AArch64CC::VS;
1877 break;
1878 case ISD::USUBO:
1879 Opc = AArch64ISD::SUBS;
1880 CC = AArch64CC::LO;
1881 break;
1882 // Multiply needs a little bit extra work.
1883 case ISD::SMULO:
1884 case ISD::UMULO: {
1885 CC = AArch64CC::NE;
1886 bool IsSigned = Op.getOpcode() == ISD::SMULO;
1887 if (Op.getValueType() == MVT::i32) {
1888 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1889 // For a 32 bit multiply with overflow check we want the instruction
1890 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
1891 // need to generate the following pattern:
1892 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
1893 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
1894 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
1895 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1896 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
1897 DAG.getConstant(0, DL, MVT::i64));
1898 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
1899 // operation. We need to clear out the upper 32 bits, because we used a
1900 // widening multiply that wrote all 64 bits. In the end this should be a
1901 // noop.
1902 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
1903 if (IsSigned) {
1904 // The signed overflow check requires more than just a simple check for
1905 // any bit set in the upper 32 bits of the result. These bits could be
1906 // just the sign bits of a negative number. To perform the overflow
1907 // check we have to arithmetic shift right the 32nd bit of the result by
1908 // 31 bits. Then we compare the result to the upper 32 bits.
1909 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
1910 DAG.getConstant(32, DL, MVT::i64));
1911 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
1912 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
1913 DAG.getConstant(31, DL, MVT::i64));
1914 // It is important that LowerBits is last, otherwise the arithmetic
1915 // shift will not be folded into the compare (SUBS).
1916 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
1917 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1918 .getValue(1);
1919 } else {
1920 // The overflow check for unsigned multiply is easy. We only need to
1921 // check if any of the upper 32 bits are set. This can be done with a
1922 // CMP (shifted register). For that we need to generate the following
1923 // pattern:
1924 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
1925 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
1926 DAG.getConstant(32, DL, MVT::i64));
1927 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1928 Overflow =
1929 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1930 DAG.getConstant(0, DL, MVT::i64),
1931 UpperBits).getValue(1);
1932 }
1933 break;
1934 }
1935 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Expected an i64 value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1935, __extension__ __PRETTY_FUNCTION__))
;
1936 // For the 64 bit multiply
1937 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1938 if (IsSigned) {
1939 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
1940 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
1941 DAG.getConstant(63, DL, MVT::i64));
1942 // It is important that LowerBits is last, otherwise the arithmetic
1943 // shift will not be folded into the compare (SUBS).
1944 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1945 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1946 .getValue(1);
1947 } else {
1948 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
1949 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1950 Overflow =
1951 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1952 DAG.getConstant(0, DL, MVT::i64),
1953 UpperBits).getValue(1);
1954 }
1955 break;
1956 }
1957 } // switch (...)
1958
1959 if (Opc) {
1960 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
1961
1962 // Emit the AArch64 operation with overflow check.
1963 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
1964 Overflow = Value.getValue(1);
1965 }
1966 return std::make_pair(Value, Overflow);
1967}
1968
1969SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
1970 RTLIB::Libcall Call) const {
1971 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
1972 return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
1973}
1974
1975// Returns true if the given Op is the overflow flag result of an overflow
1976// intrinsic operation.
1977static bool isOverflowIntrOpRes(SDValue Op) {
1978 unsigned Opc = Op.getOpcode();
1979 return (Op.getResNo() == 1 &&
1980 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
1981 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
1982}
1983
1984static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
1985 SDValue Sel = Op.getOperand(0);
1986 SDValue Other = Op.getOperand(1);
1987 SDLoc dl(Sel);
1988
1989 // If the operand is an overflow checking operation, invert the condition
1990 // code and kill the Not operation. I.e., transform:
1991 // (xor (overflow_op_bool, 1))
1992 // -->
1993 // (csel 1, 0, invert(cc), overflow_op_bool)
1994 // ... which later gets transformed to just a cset instruction with an
1995 // inverted condition code, rather than a cset + eor sequence.
1996 if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
1997 // Only lower legal XALUO ops.
1998 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
1999 return SDValue();
2000
2001 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2002 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2003 AArch64CC::CondCode CC;
2004 SDValue Value, Overflow;
2005 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2006 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2007 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2008 CCVal, Overflow);
2009 }
2010 // If neither operand is a SELECT_CC, give up.
2011 if (Sel.getOpcode() != ISD::SELECT_CC)
2012 std::swap(Sel, Other);
2013 if (Sel.getOpcode() != ISD::SELECT_CC)
2014 return Op;
2015
2016 // The folding we want to perform is:
2017 // (xor x, (select_cc a, b, cc, 0, -1) )
2018 // -->
2019 // (csel x, (xor x, -1), cc ...)
2020 //
2021 // The latter will get matched to a CSINV instruction.
2022
2023 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2024 SDValue LHS = Sel.getOperand(0);
2025 SDValue RHS = Sel.getOperand(1);
2026 SDValue TVal = Sel.getOperand(2);
2027 SDValue FVal = Sel.getOperand(3);
2028
2029 // FIXME: This could be generalized to non-integer comparisons.
2030 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2031 return Op;
2032
2033 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2034 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2035
2036 // The values aren't constants, this isn't the pattern we're looking for.
2037 if (!CFVal || !CTVal)
2038 return Op;
2039
2040 // We can commute the SELECT_CC by inverting the condition. This
2041 // might be needed to make this fit into a CSINV pattern.
2042 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2043 std::swap(TVal, FVal);
2044 std::swap(CTVal, CFVal);
2045 CC = ISD::getSetCCInverse(CC, true);
2046 }
2047
2048 // If the constants line up, perform the transform!
2049 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2050 SDValue CCVal;
2051 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2052
2053 FVal = Other;
2054 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2055 DAG.getConstant(-1ULL, dl, Other.getValueType()));
2056
2057 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2058 CCVal, Cmp);
2059 }
2060
2061 return Op;
2062}
2063
2064static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2065 EVT VT = Op.getValueType();
2066
2067 // Let legalize expand this if it isn't a legal type yet.
2068 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2069 return SDValue();
2070
2071 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2072
2073 unsigned Opc;
2074 bool ExtraOp = false;
2075 switch (Op.getOpcode()) {
2076 default:
2077 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2077)
;
2078 case ISD::ADDC:
2079 Opc = AArch64ISD::ADDS;
2080 break;
2081 case ISD::SUBC:
2082 Opc = AArch64ISD::SUBS;
2083 break;
2084 case ISD::ADDE:
2085 Opc = AArch64ISD::ADCS;
2086 ExtraOp = true;
2087 break;
2088 case ISD::SUBE:
2089 Opc = AArch64ISD::SBCS;
2090 ExtraOp = true;
2091 break;
2092 }
2093
2094 if (!ExtraOp)
2095 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2096 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2097 Op.getOperand(2));
2098}
2099
2100static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
2101 // Let legalize expand this if it isn't a legal type yet.
2102 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
2103 return SDValue();
2104
2105 SDLoc dl(Op);
2106 AArch64CC::CondCode CC;
2107 // The actual operation that sets the overflow or carry flag.
2108 SDValue Value, Overflow;
2109 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2110
2111 // We use 0 and 1 as false and true values.
2112 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2113 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2114
2115 // We use an inverted condition, because the conditional select is inverted
2116 // too. This will allow it to be selected to a single instruction:
2117 // CSINC Wd, WZR, WZR, invert(cond).
2118 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2119 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2120 CCVal, Overflow);
2121
2122 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2123 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2124}
2125
2126// Prefetch operands are:
2127// 1: Address to prefetch
2128// 2: bool isWrite
2129// 3: int locality (0 = no locality ... 3 = extreme locality)
2130// 4: bool isDataCache
2131static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
2132 SDLoc DL(Op);
2133 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2134 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2135 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2136
2137 bool IsStream = !Locality;
2138 // When the locality number is set
2139 if (Locality) {
2140 // The front-end should have filtered out the out-of-range values
2141 assert(Locality <= 3 && "Prefetch locality out-of-range")(static_cast <bool> (Locality <= 3 && "Prefetch locality out-of-range"
) ? void (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2141, __extension__ __PRETTY_FUNCTION__))
;
2142 // The locality degree is the opposite of the cache speed.
2143 // Put the number the other way around.
2144 // The encoding starts at 0 for level 1
2145 Locality = 3 - Locality;
2146 }
2147
2148 // built the mask value encoding the expected behavior.
2149 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
2150 (!IsData << 3) | // IsDataCache bit
2151 (Locality << 1) | // Cache level bits
2152 (unsigned)IsStream; // Stream bit
2153 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2154 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2155}
2156
2157SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2158 SelectionDAG &DAG) const {
2159 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")(static_cast <bool> (Op.getValueType() == MVT::f128 &&
"Unexpected lowering") ? void (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2159, __extension__ __PRETTY_FUNCTION__))
;
2160
2161 RTLIB::Libcall LC;
2162 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
2163
2164 return LowerF128Call(Op, DAG, LC);
2165}
2166
2167SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2168 SelectionDAG &DAG) const {
2169 if (Op.getOperand(0).getValueType() != MVT::f128) {
2170 // It's legal except when f128 is involved
2171 return Op;
2172 }
2173
2174 RTLIB::Libcall LC;
2175 LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
2176
2177 // FP_ROUND node has a second operand indicating whether it is known to be
2178 // precise. That doesn't take part in the LibCall so we can't directly use
2179 // LowerF128Call.
2180 SDValue SrcVal = Op.getOperand(0);
2181 return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
2182 SDLoc(Op)).first;
2183}
2184
2185static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2186 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2187 // Any additional optimization in this function should be recorded
2188 // in the cost tables.
2189 EVT InVT = Op.getOperand(0).getValueType();
2190 EVT VT = Op.getValueType();
2191 unsigned NumElts = InVT.getVectorNumElements();
2192
2193 // f16 vectors are promoted to f32 before a conversion.
2194 if (InVT.getVectorElementType() == MVT::f16) {
2195 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2196 SDLoc dl(Op);
2197 return DAG.getNode(
2198 Op.getOpcode(), dl, Op.getValueType(),
2199 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2200 }
2201
2202 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2203 SDLoc dl(Op);
2204 SDValue Cv =
2205 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2206 Op.getOperand(0));
2207 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2208 }
2209
2210 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2211 SDLoc dl(Op);
2212 MVT ExtVT =
2213 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
2214 VT.getVectorNumElements());
2215 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2216 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2217 }
2218
2219 // Type changing conversions are illegal.
2220 return Op;
2221}
2222
2223SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2224 SelectionDAG &DAG) const {
2225 if (Op.getOperand(0).getValueType().isVector())
2226 return LowerVectorFP_TO_INT(Op, DAG);
2227
2228 // f16 conversions are promoted to f32 when full fp16 is not supported.
2229 if (Op.getOperand(0).getValueType() == MVT::f16 &&
2230 !Subtarget->hasFullFP16()) {
2231 SDLoc dl(Op);
2232 return DAG.getNode(
2233 Op.getOpcode(), dl, Op.getValueType(),
2234 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
2235 }
2236
2237 if (Op.getOperand(0).getValueType() != MVT::f128) {
2238 // It's legal except when f128 is involved
2239 return Op;
2240 }
2241
2242 RTLIB::Libcall LC;
2243 if (Op.getOpcode() == ISD::FP_TO_SINT)
2244 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
2245 else
2246 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
2247
2248 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2249 return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
2250}
2251
2252static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2253 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2254 // Any additional optimization in this function should be recorded
2255 // in the cost tables.
2256 EVT VT = Op.getValueType();
2257 SDLoc dl(Op);
2258 SDValue In = Op.getOperand(0);
2259 EVT InVT = In.getValueType();
2260
2261 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2262 MVT CastVT =
2263 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
2264 InVT.getVectorNumElements());
2265 In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2266 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2267 }
2268
2269 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2270 unsigned CastOpc =
2271 Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2272 EVT CastVT = VT.changeVectorElementTypeToInteger();
2273 In = DAG.getNode(CastOpc, dl, CastVT, In);
2274 return DAG.getNode(Op.getOpcode(), dl, VT, In);
2275 }
2276
2277 return Op;
2278}
2279
2280SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2281 SelectionDAG &DAG) const {
2282 if (Op.getValueType().isVector())
2283 return LowerVectorINT_TO_FP(Op, DAG);
2284
2285 // f16 conversions are promoted to f32 when full fp16 is not supported.
2286 if (Op.getValueType() == MVT::f16 &&
2287 !Subtarget->hasFullFP16()) {
2288 SDLoc dl(Op);
2289 return DAG.getNode(
2290 ISD::FP_ROUND, dl, MVT::f16,
2291 DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
2292 DAG.getIntPtrConstant(0, dl));
2293 }
2294
2295 // i128 conversions are libcalls.
2296 if (Op.getOperand(0).getValueType() == MVT::i128)
2297 return SDValue();
2298
2299 // Other conversions are legal, unless it's to the completely software-based
2300 // fp128.
2301 if (Op.getValueType() != MVT::f128)
2302 return Op;
2303
2304 RTLIB::Libcall LC;
2305 if (Op.getOpcode() == ISD::SINT_TO_FP)
2306 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2307 else
2308 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2309
2310 return LowerF128Call(Op, DAG, LC);
2311}
2312
2313SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2314 SelectionDAG &DAG) const {
2315 // For iOS, we want to call an alternative entry point: __sincos_stret,
2316 // which returns the values in two S / D registers.
2317 SDLoc dl(Op);
2318 SDValue Arg = Op.getOperand(0);
2319 EVT ArgVT = Arg.getValueType();
2320 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2321
2322 ArgListTy Args;
2323 ArgListEntry Entry;
2324
2325 Entry.Node = Arg;
2326 Entry.Ty = ArgTy;
2327 Entry.IsSExt = false;
2328 Entry.IsZExt = false;
2329 Args.push_back(Entry);
2330
2331 const char *LibcallName =
2332 (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
2333 SDValue Callee =
2334 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2335
2336 StructType *RetTy = StructType::get(ArgTy, ArgTy);
2337 TargetLowering::CallLoweringInfo CLI(DAG);
2338 CLI.setDebugLoc(dl)
2339 .setChain(DAG.getEntryNode())
2340 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2341
2342 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2343 return CallResult.first;
2344}
2345
2346static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2347 if (Op.getValueType() != MVT::f16)
2348 return SDValue();
2349
2350 assert(Op.getOperand(0).getValueType() == MVT::i16)(static_cast <bool> (Op.getOperand(0).getValueType() ==
MVT::i16) ? void (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2350, __extension__ __PRETTY_FUNCTION__))
;
2351 SDLoc DL(Op);
2352
2353 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2354 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2355 return SDValue(
2356 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2357 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2358 0);
2359}
2360
2361static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2362 if (OrigVT.getSizeInBits() >= 64)
2363 return OrigVT;
2364
2365 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2365, __extension__ __PRETTY_FUNCTION__))
;
2366
2367 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2368 switch (OrigSimpleTy) {
2369 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2369)
;
2370 case MVT::v2i8:
2371 case MVT::v2i16:
2372 return MVT::v2i32;
2373 case MVT::v4i8:
2374 return MVT::v4i16;
2375 }
2376}
2377
2378static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2379 const EVT &OrigTy,
2380 const EVT &ExtTy,
2381 unsigned ExtOpcode) {
2382 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2383 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2384 // 64-bits we need to insert a new extension so that it will be 64-bits.
2385 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2385, __extension__ __PRETTY_FUNCTION__))
;
2386 if (OrigTy.getSizeInBits() >= 64)
2387 return N;
2388
2389 // Must extend size to at least 64 bits to be used as an operand for VMULL.
2390 EVT NewVT = getExtensionTo64Bits(OrigTy);
2391
2392 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2393}
2394
2395static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2396 bool isSigned) {
2397 EVT VT = N->getValueType(0);
2398
2399 if (N->getOpcode() != ISD::BUILD_VECTOR)
2400 return false;
2401
2402 for (const SDValue &Elt : N->op_values()) {
2403 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2404 unsigned EltSize = VT.getScalarSizeInBits();
2405 unsigned HalfSize = EltSize / 2;
2406 if (isSigned) {
2407 if (!isIntN(HalfSize, C->getSExtValue()))
2408 return false;
2409 } else {
2410 if (!isUIntN(HalfSize, C->getZExtValue()))
2411 return false;
2412 }
2413 continue;
2414 }
2415 return false;
2416 }
2417
2418 return true;
2419}
2420
2421static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2422 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2423 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2424 N->getOperand(0)->getValueType(0),
2425 N->getValueType(0),
2426 N->getOpcode());
2427
2428 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2428, __extension__ __PRETTY_FUNCTION__))
;
2429 EVT VT = N->getValueType(0);
2430 SDLoc dl(N);
2431 unsigned EltSize = VT.getScalarSizeInBits() / 2;
2432 unsigned NumElts = VT.getVectorNumElements();
2433 MVT TruncVT = MVT::getIntegerVT(EltSize);
2434 SmallVector<SDValue, 8> Ops;
2435 for (unsigned i = 0; i != NumElts; ++i) {
2436 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2437 const APInt &CInt = C->getAPIntValue();
2438 // Element types smaller than 32 bits are not legal, so use i32 elements.
2439 // The values are implicitly truncated so sext vs. zext doesn't matter.
2440 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2441 }
2442 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2443}
2444
2445static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2446 return N->getOpcode() == ISD::SIGN_EXTEND ||
2447 isExtendedBUILD_VECTOR(N, DAG, true);
2448}
2449
2450static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2451 return N->getOpcode() == ISD::ZERO_EXTEND ||
2452 isExtendedBUILD_VECTOR(N, DAG, false);
2453}
2454
2455static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2456 unsigned Opcode = N->getOpcode();
2457 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2458 SDNode *N0 = N->getOperand(0).getNode();
2459 SDNode *N1 = N->getOperand(1).getNode();
2460 return N0->hasOneUse() && N1->hasOneUse() &&
2461 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2462 }
2463 return false;
2464}
2465
2466static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2467 unsigned Opcode = N->getOpcode();
2468 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2469 SDNode *N0 = N->getOperand(0).getNode();
2470 SDNode *N1 = N->getOperand(1).getNode();
2471 return N0->hasOneUse() && N1->hasOneUse() &&
2472 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2473 }
2474 return false;
2475}
2476
2477static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2478 // Multiplications are only custom-lowered for 128-bit vectors so that
2479 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2480 EVT VT = Op.getValueType();
2481 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2482, __extension__ __PRETTY_FUNCTION__))
2482 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2482, __extension__ __PRETTY_FUNCTION__))
;
2483 SDNode *N0 = Op.getOperand(0).getNode();
2484 SDNode *N1 = Op.getOperand(1).getNode();
2485 unsigned NewOpc = 0;
2486 bool isMLA = false;
2487 bool isN0SExt = isSignExtended(N0, DAG);
2488 bool isN1SExt = isSignExtended(N1, DAG);
2489 if (isN0SExt && isN1SExt)
2490 NewOpc = AArch64ISD::SMULL;
2491 else {
2492 bool isN0ZExt = isZeroExtended(N0, DAG);
2493 bool isN1ZExt = isZeroExtended(N1, DAG);
2494 if (isN0ZExt && isN1ZExt)
2495 NewOpc = AArch64ISD::UMULL;
2496 else if (isN1SExt || isN1ZExt) {
2497 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2498 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2499 if (isN1SExt && isAddSubSExt(N0, DAG)) {
2500 NewOpc = AArch64ISD::SMULL;
2501 isMLA = true;
2502 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2503 NewOpc = AArch64ISD::UMULL;
2504 isMLA = true;
2505 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2506 std::swap(N0, N1);
2507 NewOpc = AArch64ISD::UMULL;
2508 isMLA = true;
2509 }
2510 }
2511
2512 if (!NewOpc) {
2513 if (VT == MVT::v2i64)
2514 // Fall through to expand this. It is not legal.
2515 return SDValue();
2516 else
2517 // Other vector multiplications are legal.
2518 return Op;
2519 }
2520 }
2521
2522 // Legalize to a S/UMULL instruction
2523 SDLoc DL(Op);
2524 SDValue Op0;
2525 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2526 if (!isMLA) {
2527 Op0 = skipExtensionForVectorMULL(N0, DAG);
2528 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2530, __extension__ __PRETTY_FUNCTION__))
2529 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2530, __extension__ __PRETTY_FUNCTION__))
2530 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2530, __extension__ __PRETTY_FUNCTION__))
;
2531 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2532 }
2533 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2534 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2535 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2536 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2537 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2538 EVT Op1VT = Op1.getValueType();
2539 return DAG.getNode(N0->getOpcode(), DL, VT,
2540 DAG.getNode(NewOpc, DL, VT,
2541 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2542 DAG.getNode(NewOpc, DL, VT,
2543 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2544}
2545
2546SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2547 SelectionDAG &DAG) const {
2548 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2549 SDLoc dl(Op);
2550 switch (IntNo) {
2551 default: return SDValue(); // Don't custom lower most intrinsics.
2552 case Intrinsic::thread_pointer: {
2553 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2554 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2555 }
2556 case Intrinsic::aarch64_neon_abs:
2557 return DAG.getNode(ISD::ABS, dl, Op.getValueType(),
2558 Op.getOperand(1));
2559 case Intrinsic::aarch64_neon_smax:
2560 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2561 Op.getOperand(1), Op.getOperand(2));
2562 case Intrinsic::aarch64_neon_umax:
2563 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2564 Op.getOperand(1), Op.getOperand(2));
2565 case Intrinsic::aarch64_neon_smin:
2566 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2567 Op.getOperand(1), Op.getOperand(2));
2568 case Intrinsic::aarch64_neon_umin:
2569 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2570 Op.getOperand(1), Op.getOperand(2));
2571 }
2572}
2573
2574SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
2575 SelectionDAG &DAG) const {
2576 DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
2577 DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
2578
2579 switch (Op.getOpcode()) {
2580 default:
2581 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2581)
;
2582 return SDValue();
2583 case ISD::BITCAST:
2584 return LowerBITCAST(Op, DAG);
2585 case ISD::GlobalAddress:
2586 return LowerGlobalAddress(Op, DAG);
2587 case ISD::GlobalTLSAddress:
2588 return LowerGlobalTLSAddress(Op, DAG);
2589 case ISD::SETCC:
2590 return LowerSETCC(Op, DAG);
2591 case ISD::BR_CC:
2592 return LowerBR_CC(Op, DAG);
2593 case ISD::SELECT:
2594 return LowerSELECT(Op, DAG);
2595 case ISD::SELECT_CC:
2596 return LowerSELECT_CC(Op, DAG);
2597 case ISD::JumpTable:
2598 return LowerJumpTable(Op, DAG);
2599 case ISD::ConstantPool:
2600 return LowerConstantPool(Op, DAG);
2601 case ISD::BlockAddress:
2602 return LowerBlockAddress(Op, DAG);
2603 case ISD::VASTART:
2604 return LowerVASTART(Op, DAG);
2605 case ISD::VACOPY:
2606 return LowerVACOPY(Op, DAG);
2607 case ISD::VAARG:
2608 return LowerVAARG(Op, DAG);
2609 case ISD::ADDC:
2610 case ISD::ADDE:
2611 case ISD::SUBC:
2612 case ISD::SUBE:
2613 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
2614 case ISD::SADDO:
2615 case ISD::UADDO:
2616 case ISD::SSUBO:
2617 case ISD::USUBO:
2618 case ISD::SMULO:
2619 case ISD::UMULO:
2620 return LowerXALUO(Op, DAG);
2621 case ISD::FADD:
2622 return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
2623 case ISD::FSUB:
2624 return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
2625 case ISD::FMUL:
2626 return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
2627 case ISD::FDIV:
2628 return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
2629 case ISD::FP_ROUND:
2630 return LowerFP_ROUND(Op, DAG);
2631 case ISD::FP_EXTEND:
2632 return LowerFP_EXTEND(Op, DAG);
2633 case ISD::FRAMEADDR:
2634 return LowerFRAMEADDR(Op, DAG);
2635 case ISD::RETURNADDR:
2636 return LowerRETURNADDR(Op, DAG);
2637 case ISD::INSERT_VECTOR_ELT:
2638 return LowerINSERT_VECTOR_ELT(Op, DAG);
2639 case ISD::EXTRACT_VECTOR_ELT:
2640 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2641 case ISD::BUILD_VECTOR:
2642 return LowerBUILD_VECTOR(Op, DAG);
2643 case ISD::VECTOR_SHUFFLE:
2644 return LowerVECTOR_SHUFFLE(Op, DAG);
2645 case ISD::EXTRACT_SUBVECTOR:
2646 return LowerEXTRACT_SUBVECTOR(Op, DAG);
2647 case ISD::SRA:
2648 case ISD::SRL:
2649 case ISD::SHL:
2650 return LowerVectorSRA_SRL_SHL(Op, DAG);
2651 case ISD::SHL_PARTS:
2652 return LowerShiftLeftParts(Op, DAG);
2653 case ISD::SRL_PARTS:
2654 case ISD::SRA_PARTS:
2655 return LowerShiftRightParts(Op, DAG);
2656 case ISD::CTPOP:
2657 return LowerCTPOP(Op, DAG);
2658 case ISD::FCOPYSIGN:
2659 return LowerFCOPYSIGN(Op, DAG);
2660 case ISD::AND:
2661 return LowerVectorAND(Op, DAG);
2662 case ISD::OR:
2663 return LowerVectorOR(Op, DAG);
2664 case ISD::XOR:
2665 return LowerXOR(Op, DAG);
2666 case ISD::PREFETCH:
2667 return LowerPREFETCH(Op, DAG);
2668 case ISD::SINT_TO_FP:
2669 case ISD::UINT_TO_FP:
2670 return LowerINT_TO_FP(Op, DAG);
2671 case ISD::FP_TO_SINT:
2672 case ISD::FP_TO_UINT:
2673 return LowerFP_TO_INT(Op, DAG);
2674 case ISD::FSINCOS:
2675 return LowerFSINCOS(Op, DAG);
2676 case ISD::MUL:
2677 return LowerMUL(Op, DAG);
2678 case ISD::INTRINSIC_WO_CHAIN:
2679 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2680 case ISD::VECREDUCE_ADD:
2681 case ISD::VECREDUCE_SMAX:
2682 case ISD::VECREDUCE_SMIN:
2683 case ISD::VECREDUCE_UMAX:
2684 case ISD::VECREDUCE_UMIN:
2685 case ISD::VECREDUCE_FMAX:
2686 case ISD::VECREDUCE_FMIN:
2687 return LowerVECREDUCE(Op, DAG);
2688 }
2689}
2690
2691//===----------------------------------------------------------------------===//
2692// Calling Convention Implementation
2693//===----------------------------------------------------------------------===//
2694
2695#include "AArch64GenCallingConv.inc"
2696
2697/// Selects the correct CCAssignFn for a given CallingConvention value.
2698CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2699 bool IsVarArg) const {
2700 switch (CC) {
2701 default:
2702 report_fatal_error("Unsupported calling convention.");
2703 case CallingConv::WebKit_JS:
2704 return CC_AArch64_WebKit_JS;
2705 case CallingConv::GHC:
2706 return CC_AArch64_GHC;
2707 case CallingConv::C:
2708 case CallingConv::Fast:
2709 case CallingConv::PreserveMost:
2710 case CallingConv::CXX_FAST_TLS:
2711 case CallingConv::Swift:
2712 if (Subtarget->isTargetWindows() && IsVarArg)
2713 return CC_AArch64_Win64_VarArg;
2714 if (!Subtarget->isTargetDarwin())
2715 return CC_AArch64_AAPCS;
2716 return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
2717 case CallingConv::Win64:
2718 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
2719 }
2720}
2721
2722CCAssignFn *
2723AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
2724 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2725 : RetCC_AArch64_AAPCS;
2726}
2727
2728SDValue AArch64TargetLowering::LowerFormalArguments(
2729 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2730 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2731 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2732 MachineFunction &MF = DAG.getMachineFunction();
2733 MachineFrameInfo &MFI = MF.getFrameInfo();
2734 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv());
2735
2736 // Assign locations to all of the incoming arguments.
2737 SmallVector<CCValAssign, 16> ArgLocs;
2738 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2739 *DAG.getContext());
2740
2741 // At this point, Ins[].VT may already be promoted to i32. To correctly
2742 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
2743 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
2744 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
2745 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
2746 // LocVT.
2747 unsigned NumArgs = Ins.size();
2748 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
2749 unsigned CurArgIdx = 0;
2750 for (unsigned i = 0; i != NumArgs; ++i) {
2751 MVT ValVT = Ins[i].VT;
2752 if (Ins[i].isOrigArg()) {
2753 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
2754 CurArgIdx = Ins[i].getOrigArgIndex();
2755
2756 // Get type of the original argument.
2757 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
2758 /*AllowUnknown*/ true);
2759 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
2760 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
2761 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
2762 ValVT = MVT::i8;
2763 else if (ActualMVT == MVT::i16)
2764 ValVT = MVT::i16;
2765 }
2766 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
2767 bool Res =
2768 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
2769 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2769, __extension__ __PRETTY_FUNCTION__))
;
2770 (void)Res;
2771 }
2772 assert(ArgLocs.size() == Ins.size())(static_cast <bool> (ArgLocs.size() == Ins.size()) ? void
(0) : __assert_fail ("ArgLocs.size() == Ins.size()", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2772, __extension__ __PRETTY_FUNCTION__))
;
2773 SmallVector<SDValue, 16> ArgValues;
2774 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2775 CCValAssign &VA = ArgLocs[i];
2776
2777 if (Ins[i].Flags.isByVal()) {
2778 // Byval is used for HFAs in the PCS, but the system should work in a
2779 // non-compliant manner for larger structs.
2780 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2781 int Size = Ins[i].Flags.getByValSize();
2782 unsigned NumRegs = (Size + 7) / 8;
2783
2784 // FIXME: This works on big-endian for composite byvals, which are the common
2785 // case. It should also work for fundamental types too.
2786 unsigned FrameIdx =
2787 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
2788 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
2789 InVals.push_back(FrameIdxN);
2790
2791 continue;
2792 }
2793
2794 if (VA.isRegLoc()) {
2795 // Arguments stored in registers.
2796 EVT RegVT = VA.getLocVT();
2797
2798 SDValue ArgValue;
2799 const TargetRegisterClass *RC;
2800
2801 if (RegVT == MVT::i32)
2802 RC = &AArch64::GPR32RegClass;
2803 else if (RegVT == MVT::i64)
2804 RC = &AArch64::GPR64RegClass;
2805 else if (RegVT == MVT::f16)
2806 RC = &AArch64::FPR16RegClass;
2807 else if (RegVT == MVT::f32)
2808 RC = &AArch64::FPR32RegClass;
2809 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
2810 RC = &AArch64::FPR64RegClass;
2811 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
2812 RC = &AArch64::FPR128RegClass;
2813 else
2814 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2814)
;
2815
2816 // Transform the arguments in physical registers into virtual ones.
2817 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2818 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
2819
2820 // If this is an 8, 16 or 32-bit value, it is really passed promoted
2821 // to 64 bits. Insert an assert[sz]ext to capture this, then
2822 // truncate to the right size.
2823 switch (VA.getLocInfo()) {
2824 default:
2825 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2825)
;
2826 case CCValAssign::Full:
2827 break;
2828 case CCValAssign::BCvt:
2829 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
2830 break;
2831 case CCValAssign::AExt:
2832 case CCValAssign::SExt:
2833 case CCValAssign::ZExt:
2834 // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
2835 // nodes after our lowering.
2836 assert(RegVT == Ins[i].VT && "incorrect register location selected")(static_cast <bool> (RegVT == Ins[i].VT && "incorrect register location selected"
) ? void (0) : __assert_fail ("RegVT == Ins[i].VT && \"incorrect register location selected\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2836, __extension__ __PRETTY_FUNCTION__))
;
2837 break;
2838 }
2839
2840 InVals.push_back(ArgValue);
2841
2842 } else { // VA.isRegLoc()
2843 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")(static_cast <bool> (VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2843, __extension__ __PRETTY_FUNCTION__))
;
2844 unsigned ArgOffset = VA.getLocMemOffset();
2845 unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
2846
2847 uint32_t BEAlign = 0;
2848 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
2849 !Ins[i].Flags.isInConsecutiveRegs())
2850 BEAlign = 8 - ArgSize;
2851
2852 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
2853
2854 // Create load nodes to retrieve arguments from the stack.
2855 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2856 SDValue ArgValue;
2857
2858 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
2859 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
2860 MVT MemVT = VA.getValVT();
2861
2862 switch (VA.getLocInfo()) {
2863 default:
2864 break;
2865 case CCValAssign::BCvt:
2866 MemVT = VA.getLocVT();
2867 break;
2868 case CCValAssign::SExt:
2869 ExtType = ISD::SEXTLOAD;
2870 break;
2871 case CCValAssign::ZExt:
2872 ExtType = ISD::ZEXTLOAD;
2873 break;
2874 case CCValAssign::AExt:
2875 ExtType = ISD::EXTLOAD;
2876 break;
2877 }
2878
2879 ArgValue = DAG.getExtLoad(
2880 ExtType, DL, VA.getLocVT(), Chain, FIN,
2881 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
2882 MemVT);
2883
2884 InVals.push_back(ArgValue);
2885 }
2886 }
2887
2888 // varargs
2889 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2890 if (isVarArg) {
2891 if (!Subtarget->isTargetDarwin() || IsWin64) {
2892 // The AAPCS variadic function ABI is identical to the non-variadic
2893 // one. As a result there may be more arguments in registers and we should
2894 // save them for future reference.
2895 // Win64 variadic functions also pass arguments in registers, but all float
2896 // arguments are passed in integer registers.
2897 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
2898 }
2899
2900 // This will point to the next argument passed via stack.
2901 unsigned StackOffset = CCInfo.getNextStackOffset();
2902 // We currently pass all varargs at 8-byte alignment.
2903 StackOffset = ((StackOffset + 7) & ~7);
2904 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
2905 }
2906
2907 unsigned StackArgSize = CCInfo.getNextStackOffset();
2908 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
2909 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
2910 // This is a non-standard ABI so by fiat I say we're allowed to make full
2911 // use of the stack area to be popped, which must be aligned to 16 bytes in
2912 // any case:
2913 StackArgSize = alignTo(StackArgSize, 16);
2914
2915 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
2916 // a multiple of 16.
2917 FuncInfo->setArgumentStackToRestore(StackArgSize);
2918
2919 // This realignment carries over to the available bytes below. Our own
2920 // callers will guarantee the space is free by giving an aligned value to
2921 // CALLSEQ_START.
2922 }
2923 // Even if we're not expected to free up the space, it's useful to know how
2924 // much is there while considering tail calls (because we can reuse it).
2925 FuncInfo->setBytesInStackArgArea(StackArgSize);
2926
2927 return Chain;
2928}
2929
2930void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
2931 SelectionDAG &DAG,
2932 const SDLoc &DL,
2933 SDValue &Chain) const {
2934 MachineFunction &MF = DAG.getMachineFunction();
2935 MachineFrameInfo &MFI = MF.getFrameInfo();
2936 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2937 auto PtrVT = getPointerTy(DAG.getDataLayout());
2938 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv());
2939
2940 SmallVector<SDValue, 8> MemOps;
2941
2942 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
2943 AArch64::X3, AArch64::X4, AArch64::X5,
2944 AArch64::X6, AArch64::X7 };
2945 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
2946 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
2947
2948 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
2949 int GPRIdx = 0;
2950 if (GPRSaveSize != 0) {
2951 if (IsWin64) {
2952 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
2953 if (GPRSaveSize & 15)
2954 // The extra size here, if triggered, will always be 8.
2955 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
2956 } else
2957 GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
2958
2959 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
2960
2961 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
2962 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
2963 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
2964 SDValue Store = DAG.getStore(
2965 Val.getValue(1), DL, Val, FIN,
2966 IsWin64
2967 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
2968 GPRIdx,
2969 (i - FirstVariadicGPR) * 8)
2970 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
2971 MemOps.push_back(Store);
2972 FIN =
2973 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
2974 }
2975 }
2976 FuncInfo->setVarArgsGPRIndex(GPRIdx);
2977 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
2978
2979 if (Subtarget->hasFPARMv8() && !IsWin64) {
2980 static const MCPhysReg FPRArgRegs[] = {
2981 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
2982 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
2983 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
2984 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
2985
2986 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
2987 int FPRIdx = 0;
2988 if (FPRSaveSize != 0) {
2989 FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
2990
2991 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
2992
2993 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
2994 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
2995 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
2996
2997 SDValue Store = DAG.getStore(
2998 Val.getValue(1), DL, Val, FIN,
2999 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
3000 MemOps.push_back(Store);
3001 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3002 DAG.getConstant(16, DL, PtrVT));
3003 }
3004 }
3005 FuncInfo->setVarArgsFPRIndex(FPRIdx);
3006 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3007 }
3008
3009 if (!MemOps.empty()) {
3010 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3011 }
3012}
3013
3014/// LowerCallResult - Lower the result values of a call into the
3015/// appropriate copies out of appropriate physical registers.
3016SDValue AArch64TargetLowering::LowerCallResult(
3017 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3018 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3019 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3020 SDValue ThisVal) const {
3021 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3022 ? RetCC_AArch64_WebKit_JS
3023 : RetCC_AArch64_AAPCS;
3024 // Assign locations to each value returned by this call.
3025 SmallVector<CCValAssign, 16> RVLocs;
3026 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3027 *DAG.getContext());
3028 CCInfo.AnalyzeCallResult(Ins, RetCC);
3029
3030 // Copy all of the result registers out of their specified physreg.
3031 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3032 CCValAssign VA = RVLocs[i];
3033
3034 // Pass 'this' value directly from the argument to return value, to avoid
3035 // reg unit interference
3036 if (i == 0 && isThisReturn) {
3037 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3038, __extension__ __PRETTY_FUNCTION__))
3038 "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3038, __extension__ __PRETTY_FUNCTION__))
;
3039 InVals.push_back(ThisVal);
3040 continue;
3041 }
3042
3043 SDValue Val =
3044 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3045 Chain = Val.getValue(1);
3046 InFlag = Val.getValue(2);
3047
3048 switch (VA.getLocInfo()) {
3049 default:
3050 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3050)
;
3051 case CCValAssign::Full:
3052 break;
3053 case CCValAssign::BCvt:
3054 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3055 break;
3056 }
3057
3058 InVals.push_back(Val);
3059 }
3060
3061 return Chain;
3062}
3063
3064/// Return true if the calling convention is one that we can guarantee TCO for.
3065static bool canGuaranteeTCO(CallingConv::ID CC) {
3066 return CC == CallingConv::Fast;
3067}
3068
3069/// Return true if we might ever do TCO for calls with this calling convention.
3070static bool mayTailCallThisCC(CallingConv::ID CC) {
3071 switch (CC) {
3072 case CallingConv::C:
3073 case CallingConv::PreserveMost:
3074 case CallingConv::Swift:
3075 return true;
3076 default:
3077 return canGuaranteeTCO(CC);
3078 }
3079}
3080
3081bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3082 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3083 const SmallVectorImpl<ISD::OutputArg> &Outs,
3084 const SmallVectorImpl<SDValue> &OutVals,
3085 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3086 if (!mayTailCallThisCC(CalleeCC))
3087 return false;
3088
3089 MachineFunction &MF = DAG.getMachineFunction();
3090 const Function *CallerF = MF.getFunction();
3091 CallingConv::ID CallerCC = CallerF->getCallingConv();
3092 bool CCMatch = CallerCC == CalleeCC;
3093
3094 // Byval parameters hand the function a pointer directly into the stack area
3095 // we want to reuse during a tail call. Working around this *is* possible (see
3096 // X86) but less efficient and uglier in LowerCall.
3097 for (Function::const_arg_iterator i = CallerF->arg_begin(),
3098 e = CallerF->arg_end();
3099 i != e; ++i)
3100 if (i->hasByValAttr())
3101 return false;
3102
3103 if (getTargetMachine().Options.GuaranteedTailCallOpt)
3104 return canGuaranteeTCO(CalleeCC) && CCMatch;
3105
3106 // Externally-defined functions with weak linkage should not be
3107 // tail-called on AArch64 when the OS does not support dynamic
3108 // pre-emption of symbols, as the AAELF spec requires normal calls
3109 // to undefined weak functions to be replaced with a NOP or jump to the
3110 // next instruction. The behaviour of branch instructions in this
3111 // situation (as used for tail calls) is implementation-defined, so we
3112 // cannot rely on the linker replacing the tail call with a return.
3113 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3114 const GlobalValue *GV = G->getGlobal();
3115 const Triple &TT = getTargetMachine().getTargetTriple();
3116 if (GV->hasExternalWeakLinkage() &&
3117 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3118 return false;
3119 }
3120
3121 // Now we search for cases where we can use a tail call without changing the
3122 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3123 // concept.
3124
3125 // I want anyone implementing a new calling convention to think long and hard
3126 // about this assert.
3127 assert((!isVarArg || CalleeCC == CallingConv::C) &&(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3128, __extension__ __PRETTY_FUNCTION__))
3128 "Unexpected variadic calling convention")(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3128, __extension__ __PRETTY_FUNCTION__))
;
3129
3130 LLVMContext &C = *DAG.getContext();
3131 if (isVarArg && !Outs.empty()) {
3132 // At least two cases here: if caller is fastcc then we can't have any
3133 // memory arguments (we'd be expected to clean up the stack afterwards). If
3134 // caller is C then we could potentially use its argument area.
3135
3136 // FIXME: for now we take the most conservative of these in both cases:
3137 // disallow all variadic memory operands.
3138 SmallVector<CCValAssign, 16> ArgLocs;
3139 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3140
3141 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3142 for (const CCValAssign &ArgLoc : ArgLocs)
3143 if (!ArgLoc.isRegLoc())
3144 return false;
3145 }
3146
3147 // Check that the call results are passed in the same way.
3148 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3149 CCAssignFnForCall(CalleeCC, isVarArg),
3150 CCAssignFnForCall(CallerCC, isVarArg)))
3151 return false;
3152 // The callee has to preserve all registers the caller needs to preserve.
3153 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3154 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3155 if (!CCMatch) {
3156 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3157 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3158 return false;
3159 }
3160
3161 // Nothing more to check if the callee is taking no arguments
3162 if (Outs.empty())
3163 return true;
3164
3165 SmallVector<CCValAssign, 16> ArgLocs;
3166 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3167
3168 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3169
3170 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3171
3172 // If the stack arguments for this call do not fit into our own save area then
3173 // the call cannot be made tail.
3174 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3175 return false;
3176
3177 const MachineRegisterInfo &MRI = MF.getRegInfo();
3178 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3179 return false;
3180
3181 return true;
3182}
3183
3184SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3185 SelectionDAG &DAG,
3186 MachineFrameInfo &MFI,
3187 int ClobberedFI) const {
3188 SmallVector<SDValue, 8> ArgChains;
3189 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3190 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3191
3192 // Include the original chain at the beginning of the list. When this is
3193 // used by target LowerCall hooks, this helps legalize find the
3194 // CALLSEQ_BEGIN node.
3195 ArgChains.push_back(Chain);
3196
3197 // Add a chain value for each stack argument corresponding
3198 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
3199 UE = DAG.getEntryNode().getNode()->use_end();
3200 U != UE; ++U)
3201 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
3202 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
3203 if (FI->getIndex() < 0) {
3204 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
3205 int64_t InLastByte = InFirstByte;
3206 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
3207
3208 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
3209 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
3210 ArgChains.push_back(SDValue(L, 1));
3211 }
3212
3213 // Build a tokenfactor for all the chains.
3214 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
3215}
3216
3217bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
3218 bool TailCallOpt) const {
3219 return CallCC == CallingConv::Fast && TailCallOpt;
3220}
3221
3222/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
3223/// and add input and output parameter nodes.
3224SDValue
3225AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
3226 SmallVectorImpl<SDValue> &InVals) const {
3227 SelectionDAG &DAG = CLI.DAG;
3228 SDLoc &DL = CLI.DL;
3229 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
3230 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
3231 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
3232 SDValue Chain = CLI.Chain;
3233 SDValue Callee = CLI.Callee;
3234 bool &IsTailCall = CLI.IsTailCall;
3235 CallingConv::ID CallConv = CLI.CallConv;
3236 bool IsVarArg = CLI.IsVarArg;
3237
3238 MachineFunction &MF = DAG.getMachineFunction();
3239 bool IsThisReturn = false;
3240
3241 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3242 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3243 bool IsSibCall = false;
3244
3245 if (IsTailCall) {
3246 // Check if it's really possible to do a tail call.
3247 IsTailCall = isEligibleForTailCallOptimization(
3248 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3249 if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
3250 report_fatal_error("failed to perform tail call elimination on a call "
3251 "site marked musttail");
3252
3253 // A sibling call is one where we're under the usual C ABI and not planning
3254 // to change that but can still do a tail call:
3255 if (!TailCallOpt && IsTailCall)
3256 IsSibCall = true;
3257
3258 if (IsTailCall)
3259 ++NumTailCalls;
3260 }
3261
3262 // Analyze operands of the call, assigning locations to each operand.
3263 SmallVector<CCValAssign, 16> ArgLocs;
3264 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
3265 *DAG.getContext());
3266
3267 if (IsVarArg) {
3268 // Handle fixed and variable vector arguments differently.
3269 // Variable vector arguments always go into memory.
3270 unsigned NumArgs = Outs.size();
3271
3272 for (unsigned i = 0; i != NumArgs; ++i) {
3273 MVT ArgVT = Outs[i].VT;
3274 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3275 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
3276 /*IsVarArg=*/ !Outs[i].IsFixed);
3277 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
3278 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3278, __extension__ __PRETTY_FUNCTION__))
;
3279 (void)Res;
3280 }
3281 } else {
3282 // At this point, Outs[].VT may already be promoted to i32. To correctly
3283 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3284 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3285 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
3286 // we use a special version of AnalyzeCallOperands to pass in ValVT and
3287 // LocVT.
3288 unsigned NumArgs = Outs.size();
3289 for (unsigned i = 0; i != NumArgs; ++i) {
3290 MVT ValVT = Outs[i].VT;
3291 // Get type of the original argument.
3292 EVT ActualVT = getValueType(DAG.getDataLayout(),
3293 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
3294 /*AllowUnknown*/ true);
3295 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
3296 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3297 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3298 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3299 ValVT = MVT::i8;
3300 else if (ActualMVT == MVT::i16)
3301 ValVT = MVT::i16;
3302
3303 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3304 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
3305 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3305, __extension__ __PRETTY_FUNCTION__))
;
3306 (void)Res;
3307 }
3308 }
3309
3310 // Get a count of how many bytes are to be pushed on the stack.
3311 unsigned NumBytes = CCInfo.getNextStackOffset();
3312
3313 if (IsSibCall) {
3314 // Since we're not changing the ABI to make this a tail call, the memory
3315 // operands are already available in the caller's incoming argument space.
3316 NumBytes = 0;
3317 }
3318
3319 // FPDiff is the byte offset of the call's argument area from the callee's.
3320 // Stores to callee stack arguments will be placed in FixedStackSlots offset
3321 // by this amount for a tail call. In a sibling call it must be 0 because the
3322 // caller will deallocate the entire stack and the callee still expects its
3323 // arguments to begin at SP+0. Completely unused for non-tail calls.
3324 int FPDiff = 0;
3325
3326 if (IsTailCall && !IsSibCall) {
3327 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
3328
3329 // Since callee will pop argument stack as a tail call, we must keep the
3330 // popped size 16-byte aligned.
3331 NumBytes = alignTo(NumBytes, 16);
3332
3333 // FPDiff will be negative if this tail call requires more space than we
3334 // would automatically have in our incoming argument space. Positive if we
3335 // can actually shrink the stack.
3336 FPDiff = NumReusableBytes - NumBytes;
3337
3338 // The stack pointer must be 16-byte aligned at all times it's used for a
3339 // memory operation, which in practice means at *all* times and in
3340 // particular across call boundaries. Therefore our own arguments started at
3341 // a 16-byte aligned SP and the delta applied for the tail call should
3342 // satisfy the same constraint.
3343 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")(static_cast <bool> (FPDiff % 16 == 0 && "unaligned stack on tail call"
) ? void (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3343, __extension__ __PRETTY_FUNCTION__))
;
3344 }
3345
3346 // Adjust the stack pointer for the new arguments...
3347 // These operations are automatically eliminated by the prolog/epilog pass
3348 if (!IsSibCall)
3349 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
3350
3351 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
3352 getPointerTy(DAG.getDataLayout()));
3353
3354 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3355 SmallVector<SDValue, 8> MemOpChains;
3356 auto PtrVT = getPointerTy(DAG.getDataLayout());
3357
3358 // Walk the register/memloc assignments, inserting copies/loads.
3359 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
3360 ++i, ++realArgIdx) {
3361 CCValAssign &VA = ArgLocs[i];
3362 SDValue Arg = OutVals[realArgIdx];
3363 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3364
3365 // Promote the value if needed.
3366 switch (VA.getLocInfo()) {
3367 default:
3368 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3368)
;
3369 case CCValAssign::Full:
3370 break;
3371 case CCValAssign::SExt:
3372 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
3373 break;
3374 case CCValAssign::ZExt:
3375 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3376 break;
3377 case CCValAssign::AExt:
3378 if (Outs[realArgIdx].ArgVT == MVT::i1) {
3379 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
3380 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3381 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
3382 }
3383 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
3384 break;
3385 case CCValAssign::BCvt:
3386 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3387 break;
3388 case CCValAssign::FPExt:
3389 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
3390 break;
3391 }
3392
3393 if (VA.isRegLoc()) {
3394 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
3395 Outs[0].VT == MVT::i64) {
3396 assert(VA.getLocVT() == MVT::i64 &&(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3397, __extension__ __PRETTY_FUNCTION__))
3397 "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3397, __extension__ __PRETTY_FUNCTION__))
;
3398 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3399, __extension__ __PRETTY_FUNCTION__))
3399 "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3399, __extension__ __PRETTY_FUNCTION__))
;
3400 IsThisReturn = true;
3401 }
3402 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3403 } else {
3404 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3404, __extension__ __PRETTY_FUNCTION__))
;
3405
3406 SDValue DstAddr;
3407 MachinePointerInfo DstInfo;
3408
3409 // FIXME: This works on big-endian for composite byvals, which are the
3410 // common case. It should also work for fundamental types too.
3411 uint32_t BEAlign = 0;
3412 unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
3413 : VA.getValVT().getSizeInBits();
3414 OpSize = (OpSize + 7) / 8;
3415 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
3416 !Flags.isInConsecutiveRegs()) {
3417 if (OpSize < 8)
3418 BEAlign = 8 - OpSize;
3419 }
3420 unsigned LocMemOffset = VA.getLocMemOffset();
3421 int32_t Offset = LocMemOffset + BEAlign;
3422 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3423 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3424
3425 if (IsTailCall) {
3426 Offset = Offset + FPDiff;
3427 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3428
3429 DstAddr = DAG.getFrameIndex(FI, PtrVT);
3430 DstInfo =
3431 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
3432
3433 // Make sure any stack arguments overlapping with where we're storing
3434 // are loaded before this eventual operation. Otherwise they'll be
3435 // clobbered.
3436 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
3437 } else {
3438 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3439
3440 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3441 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
3442 LocMemOffset);
3443 }
3444
3445 if (Outs[i].Flags.isByVal()) {
3446 SDValue SizeNode =
3447 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
3448 SDValue Cpy = DAG.getMemcpy(
3449 Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
3450 /*isVol = */ false, /*AlwaysInline = */ false,
3451 /*isTailCall = */ false,
3452 DstInfo, MachinePointerInfo());
3453
3454 MemOpChains.push_back(Cpy);
3455 } else {
3456 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
3457 // promoted to a legal register type i32, we should truncate Arg back to
3458 // i1/i8/i16.
3459 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
3460 VA.getValVT() == MVT::i16)
3461 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
3462
3463 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
3464 MemOpChains.push_back(Store);
3465 }
3466 }
3467 }
3468
3469 if (!MemOpChains.empty())
3470 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3471
3472 // Build a sequence of copy-to-reg nodes chained together with token chain
3473 // and flag operands which copy the outgoing args into the appropriate regs.
3474 SDValue InFlag;
3475 for (auto &RegToPass : RegsToPass) {
3476 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
3477 RegToPass.second, InFlag);
3478 InFlag = Chain.getValue(1);
3479 }
3480
3481 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
3482 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
3483 // node so that legalize doesn't hack it.
3484 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3485 auto GV = G->getGlobal();
3486 if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) ==
3487 AArch64II::MO_GOT) {
3488 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
3489 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3490 } else if (Subtarget->isTargetCOFF() && GV->hasDLLImportStorageClass()) {
3491 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3492, __extension__ __PRETTY_FUNCTION__))
3492 "Windows is the only supported COFF target")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3492, __extension__ __PRETTY_FUNCTION__))
;
3493 Callee = getGOT(G, DAG, AArch64II::MO_DLLIMPORT);
3494 } else {
3495 const GlobalValue *GV = G->getGlobal();
3496 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3497 }
3498 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3499 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3500 Subtarget->isTargetMachO()) {
3501 const char *Sym = S->getSymbol();
3502 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
3503 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3504 } else {
3505 const char *Sym = S->getSymbol();
3506 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
3507 }
3508 }
3509
3510 // We don't usually want to end the call-sequence here because we would tidy
3511 // the frame up *after* the call, however in the ABI-changing tail-call case
3512 // we've carefully laid out the parameters so that when sp is reset they'll be
3513 // in the correct location.
3514 if (IsTailCall && !IsSibCall) {
3515 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3516 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
3517 InFlag = Chain.getValue(1);
3518 }
3519
3520 std::vector<SDValue> Ops;
3521 Ops.push_back(Chain);
3522 Ops.push_back(Callee);
3523
3524 if (IsTailCall) {
3525 // Each tail call may have to adjust the stack by a different amount, so
3526 // this information must travel along with the operation for eventual
3527 // consumption by emitEpilogue.
3528 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
3529 }
3530
3531 // Add argument registers to the end of the list so that they are known live
3532 // into the call.
3533 for (auto &RegToPass : RegsToPass)
3534 Ops.push_back(DAG.getRegister(RegToPass.first,
3535 RegToPass.second.getValueType()));
3536
3537 // Add a register mask operand representing the call-preserved registers.
3538 const uint32_t *Mask;
3539 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3540 if (IsThisReturn) {
3541 // For 'this' returns, use the X0-preserving mask if applicable
3542 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
3543 if (!Mask) {
3544 IsThisReturn = false;
3545 Mask = TRI->getCallPreservedMask(MF, CallConv);
3546 }
3547 } else
3548 Mask = TRI->getCallPreservedMask(MF, CallConv);
3549
3550 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3550, __extension__ __PRETTY_FUNCTION__))
;
3551 Ops.push_back(DAG.getRegisterMask(Mask));
3552
3553 if (InFlag.getNode())
3554 Ops.push_back(InFlag);
3555
3556 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3557
3558 // If we're doing a tall call, use a TC_RETURN here rather than an
3559 // actual call instruction.
3560 if (IsTailCall) {
3561 MF.getFrameInfo().setHasTailCall();
3562 return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
3563 }
3564
3565 // Returns a chain and a flag for retval copy to use.
3566 Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
3567 InFlag = Chain.getValue(1);
3568
3569 uint64_t CalleePopBytes =
3570 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
3571
3572 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3573 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
3574 InFlag, DL);
3575 if (!Ins.empty())
3576 InFlag = Chain.getValue(1);
3577
3578 // Handle result values, copying them out of physregs into vregs that we
3579 // return.
3580 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
3581 InVals, IsThisReturn,
3582 IsThisReturn ? OutVals[0] : SDValue());
3583}
3584
3585bool AArch64TargetLowering::CanLowerReturn(
3586 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3587 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3588 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3589 ? RetCC_AArch64_WebKit_JS
3590 : RetCC_AArch64_AAPCS;
3591 SmallVector<CCValAssign, 16> RVLocs;
3592 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3593 return CCInfo.CheckReturn(Outs, RetCC);
3594}
3595
3596SDValue
3597AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3598 bool isVarArg,
3599 const SmallVectorImpl<ISD::OutputArg> &Outs,
3600 const SmallVectorImpl<SDValue> &OutVals,
3601 const SDLoc &DL, SelectionDAG &DAG) const {
3602 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3603 ? RetCC_AArch64_WebKit_JS
3604 : RetCC_AArch64_AAPCS;
3605 SmallVector<CCValAssign, 16> RVLocs;
3606 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3607 *DAG.getContext());
3608 CCInfo.AnalyzeReturn(Outs, RetCC);
3609
3610 // Copy the result values into the output registers.
3611 SDValue Flag;
3612 SmallVector<SDValue, 4> RetOps(1, Chain);
3613 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
3614 ++i, ++realRVLocIdx) {
3615 CCValAssign &VA = RVLocs[i];
3616 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3616, __extension__ __PRETTY_FUNCTION__))
;
3617 SDValue Arg = OutVals[realRVLocIdx];
3618
3619 switch (VA.getLocInfo()) {
3620 default:
3621 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3621)
;
3622 case CCValAssign::Full:
3623 if (Outs[i].ArgVT == MVT::i1) {
3624 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
3625 // value. This is strictly redundant on Darwin (which uses "zeroext
3626 // i1"), but will be optimised out before ISel.
3627 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3628 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3629 }
3630 break;
3631 case CCValAssign::BCvt:
3632 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3633 break;
3634 }
3635
3636 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
3637 Flag = Chain.getValue(1);
3638 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3639 }
3640 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3641 const MCPhysReg *I =
3642 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3643 if (I) {
3644 for (; *I; ++I) {
3645 if (AArch64::GPR64RegClass.contains(*I))
3646 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3647 else if (AArch64::FPR64RegClass.contains(*I))
3648 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3649 else
3650 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3650)
;
3651 }
3652 }
3653
3654 RetOps[0] = Chain; // Update chain.
3655
3656 // Add the flag if we have it.
3657 if (Flag.getNode())
3658 RetOps.push_back(Flag);
3659
3660 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
3661}
3662
3663//===----------------------------------------------------------------------===//
3664// Other Lowering Code
3665//===----------------------------------------------------------------------===//
3666
3667SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
3668 SelectionDAG &DAG,
3669 unsigned Flag) const {
3670 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
3671}
3672
3673SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
3674 SelectionDAG &DAG,
3675 unsigned Flag) const {
3676 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
3677}
3678
3679SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
3680 SelectionDAG &DAG,
3681 unsigned Flag) const {
3682 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
3683 N->getOffset(), Flag);
3684}
3685
3686SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
3687 SelectionDAG &DAG,
3688 unsigned Flag) const {
3689 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
3690}
3691
3692// (loadGOT sym)
3693template <class NodeTy>
3694SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
3695 unsigned Flags) const {
3696 DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
3697 SDLoc DL(N);
3698 EVT Ty = getPointerTy(DAG.getDataLayout());
3699 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
3700 // FIXME: Once remat is capable of dealing with instructions with register
3701 // operands, expand this into two nodes instead of using a wrapper node.
3702 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
3703}
3704
3705// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
3706template <class NodeTy>
3707SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
3708 unsigned Flags) const {
3709 DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
3710 SDLoc DL(N);
3711 EVT Ty = getPointerTy(DAG.getDataLayout());
3712 const unsigned char MO_NC = AArch64II::MO_NC;
3713 return DAG.getNode(
3714 AArch64ISD::WrapperLarge, DL, Ty,
3715 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
3716 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
3717 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
3718 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
3719}
3720
3721// (addlow (adrp %hi(sym)) %lo(sym))
3722template <class NodeTy>
3723SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3724 unsigned Flags) const {
3725 DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
3726 SDLoc DL(N);
3727 EVT Ty = getPointerTy(DAG.getDataLayout());
3728 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
3729 SDValue Lo = getTargetNode(N, Ty, DAG,
3730 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
3731 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
3732 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
3733}
3734
3735SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
3736 SelectionDAG &DAG) const {
3737 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
3738 const GlobalValue *GV = GN->getGlobal();
3739 const AArch64II::TOF TargetFlags =
3740 (GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
3741 : AArch64II::MO_NO_FLAG);
3742 unsigned char OpFlags =
3743 Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
3744
3745 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3746, __extension__ __PRETTY_FUNCTION__))
3746 "unexpected offset in global node")(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3746, __extension__ __PRETTY_FUNCTION__))
;
3747
3748 // This also catches the large code model case for Darwin.
3749 if ((OpFlags & AArch64II::MO_GOT) != 0) {
3750 return getGOT(GN, DAG, TargetFlags);
3751 }
3752
3753 SDValue Result;
3754 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
3755 Result = getAddrLarge(GN, DAG, TargetFlags);
3756 } else {
3757 Result = getAddr(GN, DAG, TargetFlags);
3758 }
3759 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3760 SDLoc DL(GN);
3761 if (GV->hasDLLImportStorageClass())
3762 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3763 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3764 return Result;
3765}
3766
3767/// \brief Convert a TLS address reference into the correct sequence of loads
3768/// and calls to compute the variable's address (for Darwin, currently) and
3769/// return an SDValue containing the final node.
3770
3771/// Darwin only has one TLS scheme which must be capable of dealing with the
3772/// fully general situation, in the worst case. This means:
3773/// + "extern __thread" declaration.
3774/// + Defined in a possibly unknown dynamic library.
3775///
3776/// The general system is that each __thread variable has a [3 x i64] descriptor
3777/// which contains information used by the runtime to calculate the address. The
3778/// only part of this the compiler needs to know about is the first xword, which
3779/// contains a function pointer that must be called with the address of the
3780/// entire descriptor in "x0".
3781///
3782/// Since this descriptor may be in a different unit, in general even the
3783/// descriptor must be accessed via an indirect load. The "ideal" code sequence
3784/// is:
3785/// adrp x0, _var@TLVPPAGE
3786/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
3787/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
3788/// ; the function pointer
3789/// blr x1 ; Uses descriptor address in x0
3790/// ; Address of _var is now in x0.
3791///
3792/// If the address of _var's descriptor *is* known to the linker, then it can
3793/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
3794/// a slight efficiency gain.
3795SDValue
3796AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
3797 SelectionDAG &DAG) const {
3798 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3799, __extension__ __PRETTY_FUNCTION__))
3799 "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3799, __extension__ __PRETTY_FUNCTION__))
;
3800
3801 SDLoc DL(Op);
3802 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3803 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3804
3805 SDValue TLVPAddr =
3806 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3807 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
3808
3809 // The first entry in the descriptor is a function pointer that we must call
3810 // to obtain the address of the variable.
3811 SDValue Chain = DAG.getEntryNode();
3812 SDValue FuncTLVGet = DAG.getLoad(
3813 MVT::i64, DL, Chain, DescAddr,
3814 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
3815 /* Alignment = */ 8,
3816 MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant |
3817 MachineMemOperand::MODereferenceable);
3818 Chain = FuncTLVGet.getValue(1);
3819
3820 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
3821 MFI.setAdjustsStack(true);
3822
3823 // TLS calls preserve all registers except those that absolutely must be
3824 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3825 // silly).
3826 const uint32_t *Mask =
3827 Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
3828
3829 // Finally, we can make the call. This is just a degenerate version of a
3830 // normal AArch64 call node: x0 takes the address of the descriptor, and
3831 // returns the address of the variable in this thread.
3832 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
3833 Chain =
3834 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3835 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
3836 DAG.getRegisterMask(Mask), Chain.getValue(1));
3837 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
3838}
3839
3840/// When accessing thread-local variables under either the general-dynamic or
3841/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
3842/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
3843/// is a function pointer to carry out the resolution.
3844///
3845/// The sequence is:
3846/// adrp x0, :tlsdesc:var
3847/// ldr x1, [x0, #:tlsdesc_lo12:var]
3848/// add x0, x0, #:tlsdesc_lo12:var
3849/// .tlsdesccall var
3850/// blr x1
3851/// (TPIDR_EL0 offset now in x0)
3852///
3853/// The above sequence must be produced unscheduled, to enable the linker to
3854/// optimize/relax this sequence.
3855/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
3856/// above sequence, and expanded really late in the compilation flow, to ensure
3857/// the sequence is produced as per above.
3858SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
3859 const SDLoc &DL,
3860 SelectionDAG &DAG) const {
3861 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3862
3863 SDValue Chain = DAG.getEntryNode();
3864 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3865
3866 Chain =
3867 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
3868 SDValue Glue = Chain.getValue(1);
3869
3870 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
3871}
3872
3873SDValue
3874AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
3875 SelectionDAG &DAG) const {
3876 assert(Subtarget->isTargetELF() && "This function expects an ELF target")(static_cast <bool> (Subtarget->isTargetELF() &&
"This function expects an ELF target") ? void (0) : __assert_fail
("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3876, __extension__ __PRETTY_FUNCTION__))
;
3877 assert(Subtarget->useSmallAddressing() &&(static_cast <bool> (Subtarget->useSmallAddressing()
&& "ELF TLS only supported in small memory model") ?
void (0) : __assert_fail ("Subtarget->useSmallAddressing() && \"ELF TLS only supported in small memory model\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3878, __extension__ __PRETTY_FUNCTION__))
3878 "ELF TLS only supported in small memory model")(static_cast <bool> (Subtarget->useSmallAddressing()
&& "ELF TLS only supported in small memory model") ?
void (0) : __assert_fail ("Subtarget->useSmallAddressing() && \"ELF TLS only supported in small memory model\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3878, __extension__ __PRETTY_FUNCTION__))
;
3879 // Different choices can be made for the maximum size of the TLS area for a
3880 // module. For the small address model, the default TLS size is 16MiB and the
3881 // maximum TLS size is 4GiB.
3882 // FIXME: add -mtls-size command line option and make it control the 16MiB
3883 // vs. 4GiB code sequence generation.
3884 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3885
3886 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
3887
3888 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
3889 if (Model == TLSModel::LocalDynamic)
3890 Model = TLSModel::GeneralDynamic;
3891 }
3892
3893 SDValue TPOff;
3894 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3895 SDLoc DL(Op);
3896 const GlobalValue *GV = GA->getGlobal();
3897
3898 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
3899
3900 if (Model == TLSModel::LocalExec) {
3901 SDValue HiVar = DAG.getTargetGlobalAddress(
3902 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
3903 SDValue LoVar = DAG.getTargetGlobalAddress(
3904 GV, DL, PtrVT, 0,
3905 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3906
3907 SDValue TPWithOff_lo =
3908 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
3909 HiVar,
3910 DAG.getTargetConstant(0, DL, MVT::i32)),
3911 0);
3912 SDValue TPWithOff =
3913 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
3914 LoVar,
3915 DAG.getTargetConstant(0, DL, MVT::i32)),
3916 0);
3917 return TPWithOff;
3918 } else if (Model == TLSModel::InitialExec) {
3919 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3920 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
3921 } else if (Model == TLSModel::LocalDynamic) {
3922 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
3923 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
3924 // the beginning of the module's TLS region, followed by a DTPREL offset
3925 // calculation.
3926
3927 // These accesses will need deduplicating if there's more than one.
3928 AArch64FunctionInfo *MFI =
3929 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
3930 MFI->incNumLocalDynamicTLSAccesses();
3931
3932 // The call needs a relocation too for linker relaxation. It doesn't make
3933 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
3934 // the address.
3935 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
3936 AArch64II::MO_TLS);
3937
3938 // Now we can calculate the offset from TPIDR_EL0 to this module's
3939 // thread-local area.
3940 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
3941
3942 // Now use :dtprel_whatever: operations to calculate this variable's offset
3943 // in its thread-storage area.
3944 SDValue HiVar = DAG.getTargetGlobalAddress(
3945 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
3946 SDValue LoVar = DAG.getTargetGlobalAddress(
3947 GV, DL, MVT::i64, 0,
3948 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3949
3950 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
3951 DAG.getTargetConstant(0, DL, MVT::i32)),
3952 0);
3953 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
3954 DAG.getTargetConstant(0, DL, MVT::i32)),
3955 0);
3956 } else if (Model == TLSModel::GeneralDynamic) {
3957 // The call needs a relocation too for linker relaxation. It doesn't make
3958 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
3959 // the address.
3960 SDValue SymAddr =
3961 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3962
3963 // Finally we can make a call to calculate the offset from tpidr_el0.
3964 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
3965 } else
3966 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3966)
;
3967
3968 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
3969}
3970
3971SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
3972 SelectionDAG &DAG) const {
3973 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3974 if (DAG.getTarget().Options.EmulatedTLS)
3975 return LowerToTLSEmulatedModel(GA, DAG);
3976
3977 if (Subtarget->isTargetDarwin())
3978 return LowerDarwinGlobalTLSAddress(Op, DAG);
3979 if (Subtarget->isTargetELF())
3980 return LowerELFGlobalTLSAddress(Op, DAG);
3981
3982 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3982)
;
3983}
3984
3985SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3986 SDValue Chain = Op.getOperand(0);
3987 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3988 SDValue LHS = Op.getOperand(2);
3989 SDValue RHS = Op.getOperand(3);
3990 SDValue Dest = Op.getOperand(4);
3991 SDLoc dl(Op);
3992
3993 // Handle f128 first, since lowering it will result in comparing the return
3994 // value of a libcall against zero, which is just what the rest of LowerBR_CC
3995 // is expecting to deal with.
3996 if (LHS.getValueType() == MVT::f128) {
3997 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
3998
3999 // If softenSetCCOperands returned a scalar, we need to compare the result
4000 // against zero to select between true and false values.
4001 if (!RHS.getNode()) {
4002 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4003 CC = ISD::SETNE;
4004 }
4005 }
4006
4007 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4008 // instruction.
4009 if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
4010 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4011 // Only lower legal XALUO ops.
4012 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4013 return SDValue();
4014
4015 // The actual operation with overflow check.
4016 AArch64CC::CondCode OFCC;
4017 SDValue Value, Overflow;
4018 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
4019
4020 if (CC == ISD::SETNE)
4021 OFCC = getInvertedCondCode(OFCC);
4022 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
4023
4024 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4025 Overflow);
4026 }
4027
4028 if (LHS.getValueType().isInteger()) {
4029 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4030, __extension__ __PRETTY_FUNCTION__))
4030 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4030, __extension__ __PRETTY_FUNCTION__))
;
4031
4032 // If the RHS of the comparison is zero, we can potentially fold this
4033 // to a specialized branch.
4034 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
4035 if (RHSC && RHSC->getZExtValue() == 0) {
4036 if (CC == ISD::SETEQ) {
4037 // See if we can use a TBZ to fold in an AND as well.
4038 // TBZ has a smaller branch displacement than CBZ. If the offset is
4039 // out of bounds, a late MI-layer pass rewrites branches.
4040 // 403.gcc is an example that hits this case.
4041 if (LHS.getOpcode() == ISD::AND &&
4042 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4043 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4044 SDValue Test = LHS.getOperand(0);
4045 uint64_t Mask = LHS.getConstantOperandVal(1);
4046 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
4047 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4048 Dest);
4049 }
4050
4051 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
4052 } else if (CC == ISD::SETNE) {
4053 // See if we can use a TBZ to fold in an AND as well.
4054 // TBZ has a smaller branch displacement than CBZ. If the offset is
4055 // out of bounds, a late MI-layer pass rewrites branches.
4056 // 403.gcc is an example that hits this case.
4057 if (LHS.getOpcode() == ISD::AND &&
4058 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4059 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4060 SDValue Test = LHS.getOperand(0);
4061 uint64_t Mask = LHS.getConstantOperandVal(1);
4062 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
4063 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4064 Dest);
4065 }
4066
4067 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
4068 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
4069 // Don't combine AND since emitComparison converts the AND to an ANDS
4070 // (a.k.a. TST) and the test in the test bit and branch instruction
4071 // becomes redundant. This would also increase register pressure.
4072 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4073 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
4074 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4075 }
4076 }
4077 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
4078 LHS.getOpcode() != ISD::AND) {
4079 // Don't combine AND since emitComparison converts the AND to an ANDS
4080 // (a.k.a. TST) and the test in the test bit and branch instruction
4081 // becomes redundant. This would also increase register pressure.
4082 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4083 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
4084 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4085 }
4086
4087 SDValue CCVal;
4088 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4089 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4090 Cmp);
4091 }
4092
4093 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4094, __extension__ __PRETTY_FUNCTION__))
4094 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4094, __extension__ __PRETTY_FUNCTION__))
;
4095
4096 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4097 // clean. Some of them require two branches to implement.
4098 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4099 AArch64CC::CondCode CC1, CC2;
4100 changeFPCCToAArch64CC(CC, CC1, CC2);
4101 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4102 SDValue BR1 =
4103 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
4104 if (CC2 != AArch64CC::AL) {
4105 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4106 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
4107 Cmp);
4108 }
4109
4110 return BR1;
4111}
4112
4113SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
4114 SelectionDAG &DAG) const {
4115 EVT VT = Op.getValueType();
4116 SDLoc DL(Op);
4117
4118 SDValue In1 = Op.getOperand(0);
4119 SDValue In2 = Op.getOperand(1);
4120 EVT SrcVT = In2.getValueType();
4121
4122 if (SrcVT.bitsLT(VT))
4123 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
4124 else if (SrcVT.bitsGT(VT))
4125 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
4126
4127 EVT VecVT;
4128 uint64_t EltMask;
4129 SDValue VecVal1, VecVal2;
4130
4131 auto setVecVal = [&] (int Idx) {
4132 if (!VT.isVector()) {
4133 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4134 DAG.getUNDEF(VecVT), In1);
4135 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4136 DAG.getUNDEF(VecVT), In2);
4137 } else {
4138 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
4139 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
4140 }
4141 };
4142
4143 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
4144 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
4145 EltMask = 0x80000000ULL;
4146 setVecVal(AArch64::ssub);
4147 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
4148 VecVT = MVT::v2i64;
4149
4150 // We want to materialize a mask with the high bit set, but the AdvSIMD
4151 // immediate moves cannot materialize that in a single instruction for
4152 // 64-bit elements. Instead, materialize zero and then negate it.
4153 EltMask = 0;
4154
4155 setVecVal(AArch64::dsub);
4156 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
4157 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
4158 EltMask = 0x8000ULL;
4159 setVecVal(AArch64::hsub);
4160 } else {
4161 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4161)
;
4162 }
4163
4164 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
4165
4166 // If we couldn't materialize the mask above, then the mask vector will be
4167 // the zero vector, and we need to negate it here.
4168 if (VT == MVT::f64 || VT == MVT::v2f64) {
4169 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
4170 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
4171 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
4172 }
4173
4174 SDValue Sel =
4175 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
4176
4177 if (VT == MVT::f16)
4178 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
4179 if (VT == MVT::f32)
4180 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
4181 else if (VT == MVT::f64)
4182 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
4183 else
4184 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
4185}
4186
4187SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
4188 if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
4189 Attribute::NoImplicitFloat))
4190 return SDValue();
4191
4192 if (!Subtarget->hasNEON())
4193 return SDValue();
4194
4195 // While there is no integer popcount instruction, it can
4196 // be more efficiently lowered to the following sequence that uses
4197 // AdvSIMD registers/instructions as long as the copies to/from
4198 // the AdvSIMD registers are cheap.
4199 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
4200 // CNT V0.8B, V0.8B // 8xbyte pop-counts
4201 // ADDV B0, V0.8B // sum 8xbyte pop-counts
4202 // UMOV X0, V0.B[0] // copy byte result back to integer reg
4203 SDValue Val = Op.getOperand(0);
4204 SDLoc DL(Op);
4205 EVT VT = Op.getValueType();
4206
4207 if (VT == MVT::i32)
4208 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
4209 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
4210
4211 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
4212 SDValue UaddLV = DAG.getNode(
4213 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
4214 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
4215
4216 if (VT == MVT::i64)
4217 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
4218 return UaddLV;
4219}
4220
4221SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
4222
4223 if (Op.getValueType().isVector())
4224 return LowerVSETCC(Op, DAG);
4225
4226 SDValue LHS = Op.getOperand(0);
4227 SDValue RHS = Op.getOperand(1);
4228 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
4229 SDLoc dl(Op);
4230
4231 // We chose ZeroOrOneBooleanContents, so use zero and one.
4232 EVT VT = Op.getValueType();
4233 SDValue TVal = DAG.getConstant(1, dl, VT);
4234 SDValue FVal = DAG.getConstant(0, dl, VT);
4235
4236 // Handle f128 first, since one possible outcome is a normal integer
4237 // comparison which gets picked up by the next if statement.
4238 if (LHS.getValueType() == MVT::f128) {
4239 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4240
4241 // If softenSetCCOperands returned a scalar, use it.
4242 if (!RHS.getNode()) {
4243 assert(LHS.getValueType() == Op.getValueType() &&(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4244, __extension__ __PRETTY_FUNCTION__))
4244 "Unexpected setcc expansion!")(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4244, __extension__ __PRETTY_FUNCTION__))
;
4245 return LHS;
4246 }
4247 }
4248
4249 if (LHS.getValueType().isInteger()) {
4250 SDValue CCVal;
4251 SDValue Cmp =
4252 getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
4253
4254 // Note that we inverted the condition above, so we reverse the order of
4255 // the true and false operands here. This will allow the setcc to be
4256 // matched to a single CSINC instruction.
4257 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
4258 }
4259
4260 // Now we know we're dealing with FP values.
4261 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4262, __extension__ __PRETTY_FUNCTION__))
4262 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4262, __extension__ __PRETTY_FUNCTION__))
;
4263
4264 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
4265 // and do the comparison.
4266 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4267
4268 AArch64CC::CondCode CC1, CC2;
4269 changeFPCCToAArch64CC(CC, CC1, CC2);
4270 if (CC2 == AArch64CC::AL) {
4271 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
4272 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4273
4274 // Note that we inverted the condition above, so we reverse the order of
4275 // the true and false operands here. This will allow the setcc to be
4276 // matched to a single CSINC instruction.
4277 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
4278 } else {
4279 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
4280 // totally clean. Some of them require two CSELs to implement. As is in
4281 // this case, we emit the first CSEL and then emit a second using the output
4282 // of the first as the RHS. We're effectively OR'ing the two CC's together.
4283
4284 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
4285 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4286 SDValue CS1 =
4287 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4288
4289 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4290 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4291 }
4292}
4293
4294SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
4295 SDValue RHS, SDValue TVal,
4296 SDValue FVal, const SDLoc &dl,
4297 SelectionDAG &DAG) const {
4298 // Handle f128 first, because it will result in a comparison of some RTLIB
4299 // call result against zero.
4300 if (LHS.getValueType() == MVT::f128) {
4301 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4302
4303 // If softenSetCCOperands returned a scalar, we need to compare the result
4304 // against zero to select between true and false values.
4305 if (!RHS.getNode()) {
4306 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4307 CC = ISD::SETNE;
4308 }
4309 }
4310
4311 // Also handle f16, for which we need to do a f32 comparison.
4312 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4313 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
4314 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
4315 }
4316
4317 // Next, handle integers.
4318 if (LHS.getValueType().isInteger()) {
4319 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4320, __extension__ __PRETTY_FUNCTION__))
4320 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4320, __extension__ __PRETTY_FUNCTION__))
;
4321
4322 unsigned Opcode = AArch64ISD::CSEL;
4323
4324 // If both the TVal and the FVal are constants, see if we can swap them in
4325 // order to for a CSINV or CSINC out of them.
4326 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
4327 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
4328
4329 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
4330 std::swap(TVal, FVal);
4331 std::swap(CTVal, CFVal);
4332 CC = ISD::getSetCCInverse(CC, true);
4333 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
4334 std::swap(TVal, FVal);
4335 std::swap(CTVal, CFVal);
4336 CC = ISD::getSetCCInverse(CC, true);
4337 } else if (TVal.getOpcode() == ISD::XOR) {
4338 // If TVal is a NOT we want to swap TVal and FVal so that we can match
4339 // with a CSINV rather than a CSEL.
4340 if (isAllOnesConstant(TVal.getOperand(1))) {
4341 std::swap(TVal, FVal);
4342 std::swap(CTVal, CFVal);
4343 CC = ISD::getSetCCInverse(CC, true);
4344 }
4345 } else if (TVal.getOpcode() == ISD::SUB) {
4346 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
4347 // that we can match with a CSNEG rather than a CSEL.
4348 if (isNullConstant(TVal.getOperand(0))) {
4349 std::swap(TVal, FVal);
4350 std::swap(CTVal, CFVal);
4351 CC = ISD::getSetCCInverse(CC, true);
4352 }
4353 } else if (CTVal && CFVal) {
4354 const int64_t TrueVal = CTVal->getSExtValue();
4355 const int64_t FalseVal = CFVal->getSExtValue();
4356 bool Swap = false;
4357
4358 // If both TVal and FVal are constants, see if FVal is the
4359 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
4360 // instead of a CSEL in that case.
4361 if (TrueVal == ~FalseVal) {
4362 Opcode = AArch64ISD::CSINV;
4363 } else if (TrueVal == -FalseVal) {
4364 Opcode = AArch64ISD::CSNEG;
4365 } else if (TVal.getValueType() == MVT::i32) {
4366 // If our operands are only 32-bit wide, make sure we use 32-bit
4367 // arithmetic for the check whether we can use CSINC. This ensures that
4368 // the addition in the check will wrap around properly in case there is
4369 // an overflow (which would not be the case if we do the check with
4370 // 64-bit arithmetic).
4371 const uint32_t TrueVal32 = CTVal->getZExtValue();
4372 const uint32_t FalseVal32 = CFVal->getZExtValue();
4373
4374 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
4375 Opcode = AArch64ISD::CSINC;
4376
4377 if (TrueVal32 > FalseVal32) {
4378 Swap = true;
4379 }
4380 }
4381 // 64-bit check whether we can use CSINC.
4382 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
4383 Opcode = AArch64ISD::CSINC;
4384
4385 if (TrueVal > FalseVal) {
4386 Swap = true;
4387 }
4388 }
4389
4390 // Swap TVal and FVal if necessary.
4391 if (Swap) {
4392 std::swap(TVal, FVal);
4393 std::swap(CTVal, CFVal);
4394 CC = ISD::getSetCCInverse(CC, true);
4395 }
4396
4397 if (Opcode != AArch64ISD::CSEL) {
4398 // Drop FVal since we can get its value by simply inverting/negating
4399 // TVal.
4400 FVal = TVal;
4401 }
4402 }
4403
4404 // Avoid materializing a constant when possible by reusing a known value in
4405 // a register. However, don't perform this optimization if the known value
4406 // is one, zero or negative one in the case of a CSEL. We can always
4407 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
4408 // FVal, respectively.
4409 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
4410 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
4411 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
4412 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4413 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
4414 // "a != C ? x : a" to avoid materializing C.
4415 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
4416 TVal = LHS;
4417 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
4418 FVal = LHS;
4419 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
4420 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")(static_cast <bool> (CTVal && CFVal && "Expected constant operands for CSNEG."
) ? void (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4420, __extension__ __PRETTY_FUNCTION__))
;
4421 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
4422 // avoid materializing C.
4423 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4424 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
4425 Opcode = AArch64ISD::CSINV;
4426 TVal = LHS;
4427 FVal = DAG.getConstant(0, dl, FVal.getValueType());
4428 }
4429 }
4430
4431 SDValue CCVal;
4432 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4433 EVT VT = TVal.getValueType();
4434 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
4435 }
4436
4437 // Now we know we're dealing with FP values.
4438 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4439, __extension__ __PRETTY_FUNCTION__))
4439 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4439, __extension__ __PRETTY_FUNCTION__))
;
4440 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4440, __extension__ __PRETTY_FUNCTION__))
;
4441 EVT VT = TVal.getValueType();
4442 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4443
4444 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4445 // clean. Some of them require two CSELs to implement.
4446 AArch64CC::CondCode CC1, CC2;
4447 changeFPCCToAArch64CC(CC, CC1, CC2);
4448
4449 if (DAG.getTarget().Options.UnsafeFPMath) {
4450 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
4451 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
4452 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
4453 if (RHSVal && RHSVal->isZero()) {
4454 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
4455 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
4456
4457 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
4458 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
4459 TVal = LHS;
4460 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
4461 CFVal && CFVal->isZero() &&
4462 FVal.getValueType() == LHS.getValueType())
4463 FVal = LHS;
4464 }
4465 }
4466
4467 // Emit first, and possibly only, CSEL.
4468 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4469 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4470
4471 // If we need a second CSEL, emit it, using the output of the first as the
4472 // RHS. We're effectively OR'ing the two CC's together.
4473 if (CC2 != AArch64CC::AL) {
4474 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4475 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4476 }
4477
4478 // Otherwise, return the output of the first CSEL.
4479 return CS1;
4480}
4481
4482SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
4483 SelectionDAG &DAG) const {
4484 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4485 SDValue LHS = Op.getOperand(0);
4486 SDValue RHS = Op.getOperand(1);
4487 SDValue TVal = Op.getOperand(2);
4488 SDValue FVal = Op.getOperand(3);
4489 SDLoc DL(Op);
4490 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4491}
4492
4493SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
4494 SelectionDAG &DAG) const {
4495 SDValue CCVal = Op->getOperand(0);
4496 SDValue TVal = Op->getOperand(1);
4497 SDValue FVal = Op->getOperand(2);
4498 SDLoc DL(Op);
4499
4500 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
4501 // instruction.
4502 if (isOverflowIntrOpRes(CCVal)) {
4503 // Only lower legal XALUO ops.
4504 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
4505 return SDValue();
4506
4507 AArch64CC::CondCode OFCC;
4508 SDValue Value, Overflow;
4509 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
4510 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
4511
4512 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
4513 CCVal, Overflow);
4514 }
4515
4516 // Lower it the same way as we would lower a SELECT_CC node.
4517 ISD::CondCode CC;
4518 SDValue LHS, RHS;
4519 if (CCVal.getOpcode() == ISD::SETCC) {
4520 LHS = CCVal.getOperand(0);
4521 RHS = CCVal.getOperand(1);
4522 CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
4523 } else {
4524 LHS = CCVal;
4525 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
4526 CC = ISD::SETNE;
4527 }
4528 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4529}
4530
4531SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
4532 SelectionDAG &DAG) const {
4533 // Jump table entries as PC relative offsets. No additional tweaking
4534 // is necessary here. Just get the address of the jump table.
4535 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
4536
4537 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4538 !Subtarget->isTargetMachO()) {
4539 return getAddrLarge(JT, DAG);
4540 }
4541 return getAddr(JT, DAG);
4542}
4543
4544SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
4545 SelectionDAG &DAG) const {
4546 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
4547
4548 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4549 // Use the GOT for the large code model on iOS.
4550 if (Subtarget->isTargetMachO()) {
4551 return getGOT(CP, DAG);
4552 }
4553 return getAddrLarge(CP, DAG);
4554 } else {
4555 return getAddr(CP, DAG);
4556 }
4557}
4558
4559SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
4560 SelectionDAG &DAG) const {
4561 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
4562 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4563 !Subtarget->isTargetMachO()) {
4564 return getAddrLarge(BA, DAG);
4565 } else {
4566 return getAddr(BA, DAG);
4567 }
4568}
4569
4570SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
4571 SelectionDAG &DAG) const {
4572 AArch64FunctionInfo *FuncInfo =
4573 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4574
4575 SDLoc DL(Op);
4576 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
4577 getPointerTy(DAG.getDataLayout()));
4578 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4579 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4580 MachinePointerInfo(SV));
4581}
4582
4583SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
4584 SelectionDAG &DAG) const {
4585 AArch64FunctionInfo *FuncInfo =
4586 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4587
4588 SDLoc DL(Op);
4589 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
4590 ? FuncInfo->getVarArgsGPRIndex()
4591 : FuncInfo->getVarArgsStackIndex(),
4592 getPointerTy(DAG.getDataLayout()));
4593 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4594 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4595 MachinePointerInfo(SV));
4596}
4597
4598SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
4599 SelectionDAG &DAG) const {
4600 // The layout of the va_list struct is specified in the AArch64 Procedure Call
4601 // Standard, section B.3.
4602 MachineFunction &MF = DAG.getMachineFunction();
4603 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4604 auto PtrVT = getPointerTy(DAG.getDataLayout());
4605 SDLoc DL(Op);
4606
4607 SDValue Chain = Op.getOperand(0);
4608 SDValue VAList = Op.getOperand(1);
4609 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4610 SmallVector<SDValue, 4> MemOps;
4611
4612 // void *__stack at offset 0
4613 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
4614 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
4615 MachinePointerInfo(SV), /* Alignment = */ 8));
4616
4617 // void *__gr_top at offset 8
4618 int GPRSize = FuncInfo->getVarArgsGPRSize();
4619 if (GPRSize > 0) {
4620 SDValue GRTop, GRTopAddr;
4621
4622 GRTopAddr =
4623 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
4624
4625 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
4626 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
4627 DAG.getConstant(GPRSize, DL, PtrVT));
4628
4629 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
4630 MachinePointerInfo(SV, 8),
4631 /* Alignment = */ 8));
4632 }
4633
4634 // void *__vr_top at offset 16
4635 int FPRSize = FuncInfo->getVarArgsFPRSize();
4636 if (FPRSize > 0) {
4637 SDValue VRTop, VRTopAddr;
4638 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4639 DAG.getConstant(16, DL, PtrVT));
4640
4641 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
4642 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
4643 DAG.getConstant(FPRSize, DL, PtrVT));
4644
4645 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
4646 MachinePointerInfo(SV, 16),
4647 /* Alignment = */ 8));
4648 }
4649
4650 // int __gr_offs at offset 24
4651 SDValue GROffsAddr =
4652 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
4653 MemOps.push_back(DAG.getStore(
4654 Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
4655 MachinePointerInfo(SV, 24), /* Alignment = */ 4));
4656
4657 // int __vr_offs at offset 28
4658 SDValue VROffsAddr =
4659 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
4660 MemOps.push_back(DAG.getStore(
4661 Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
4662 MachinePointerInfo(SV, 28), /* Alignment = */ 4));
4663
4664 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4665}
4666
4667SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
4668 SelectionDAG &DAG) const {
4669 MachineFunction &MF = DAG.getMachineFunction();
4670
4671 if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()))
4672 return LowerWin64_VASTART(Op, DAG);
4673 else if (Subtarget->isTargetDarwin())
4674 return LowerDarwin_VASTART(Op, DAG);
4675 else
4676 return LowerAAPCS_VASTART(Op, DAG);
4677}
4678
4679SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
4680 SelectionDAG &DAG) const {
4681 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
4682 // pointer.
4683 SDLoc DL(Op);
4684 unsigned VaListSize =
4685 Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
4686 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4687 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4688
4689 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
4690 Op.getOperand(2),
4691 DAG.getConstant(VaListSize, DL, MVT::i32),
4692 8, false, false, false, MachinePointerInfo(DestSV),
4693 MachinePointerInfo(SrcSV));
4694}
4695
4696SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
4697 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4698, __extension__ __PRETTY_FUNCTION__))
4698 "automatic va_arg instruction only works on Darwin")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4698, __extension__ __PRETTY_FUNCTION__))
;
4699
4700 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4701 EVT VT = Op.getValueType();
4702 SDLoc DL(Op);
4703 SDValue Chain = Op.getOperand(0);
4704 SDValue Addr = Op.getOperand(1);
4705 unsigned Align = Op.getConstantOperandVal(3);
4706 auto PtrVT = getPointerTy(DAG.getDataLayout());
4707
4708 SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V));
4709 Chain = VAList.getValue(1);
4710
4711 if (Align > 8) {
4712 assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2")(static_cast <bool> (((Align & (Align - 1)) == 0) &&
"Expected Align to be a power of 2") ? void (0) : __assert_fail
("((Align & (Align - 1)) == 0) && \"Expected Align to be a power of 2\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4712, __extension__ __PRETTY_FUNCTION__))
;
4713 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4714 DAG.getConstant(Align - 1, DL, PtrVT));
4715 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
4716 DAG.getConstant(-(int64_t)Align, DL, PtrVT));
4717 }
4718
4719 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
4720 uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
4721
4722 // Scalar integer and FP values smaller than 64 bits are implicitly extended
4723 // up to 64 bits. At the very least, we have to increase the striding of the
4724 // vaargs list to match this, and for FP values we need to introduce
4725 // FP_ROUND nodes as well.
4726 if (VT.isInteger() && !VT.isVector())
4727 ArgSize = 8;
4728 bool NeedFPTrunc = false;
4729 if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
4730 ArgSize = 8;
4731 NeedFPTrunc = true;
4732 }
4733
4734 // Increment the pointer, VAList, to the next vaarg
4735 SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4736 DAG.getConstant(ArgSize, DL, PtrVT));
4737 // Store the incremented VAList to the legalized pointer
4738 SDValue APStore =
4739 DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
4740
4741 // Load the actual argument out of the pointer VAList
4742 if (NeedFPTrunc) {
4743 // Load the value as an f64.
4744 SDValue WideFP =
4745 DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
4746 // Round the value down to an f32.
4747 SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
4748 DAG.getIntPtrConstant(1, DL));
4749 SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
4750 // Merge the rounded value with the chain output of the load.
4751 return DAG.getMergeValues(Ops, DL);
4752 }
4753
4754 return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
4755}
4756
4757SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
4758 SelectionDAG &DAG) const {
4759 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4760 MFI.setFrameAddressIsTaken(true);
4761
4762 EVT VT = Op.getValueType();
4763 SDLoc DL(Op);
4764 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4765 SDValue FrameAddr =
4766 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
4767 while (Depth--)
4768 FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
4769 MachinePointerInfo());
4770 return FrameAddr;
4771}
4772
4773// FIXME? Maybe this could be a TableGen attribute on some registers and
4774// this table could be generated automatically from RegInfo.
4775unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
4776 SelectionDAG &DAG) const {
4777 unsigned Reg = StringSwitch<unsigned>(RegName)
4778 .Case("sp", AArch64::SP)
4779 .Case("x18", AArch64::X18)
4780 .Case("w18", AArch64::W18)
4781 .Default(0);
4782 if ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
4783 !Subtarget->isX18Reserved())
4784 Reg = 0;
4785 if (Reg)
4786 return Reg;
4787 report_fatal_error(Twine("Invalid register name \""
4788 + StringRef(RegName) + "\"."));
4789}
4790
4791SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
4792 SelectionDAG &DAG) const {
4793 MachineFunction &MF = DAG.getMachineFunction();
4794 MachineFrameInfo &MFI = MF.getFrameInfo();
4795 MFI.setReturnAddressIsTaken(true);
4796
4797 EVT VT = Op.getValueType();
4798 SDLoc DL(Op);
4799 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4800 if (Depth) {
4801 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4802 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
4803 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
4804 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
4805 MachinePointerInfo());
4806 }
4807
4808 // Return LR, which contains the return address. Mark it an implicit live-in.
4809 unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
4810 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
4811}
4812
4813/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4814/// i64 values and take a 2 x i64 value to shift plus a shift amount.
4815SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
4816 SelectionDAG &DAG) const {
4817 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4817, __extension__ __PRETTY_FUNCTION__))
;
4818 EVT VT = Op.getValueType();
4819 unsigned VTBits = VT.getSizeInBits();
4820 SDLoc dl(Op);
4821 SDValue ShOpLo = Op.getOperand(0);
4822 SDValue ShOpHi = Op.getOperand(1);
4823 SDValue ShAmt = Op.getOperand(2);
4824 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4825
4826 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SRA_PARTS ||
Op.getOpcode() == ISD::SRL_PARTS) ? void (0) : __assert_fail
("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4826, __extension__ __PRETTY_FUNCTION__))
;
4827
4828 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
4829 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
4830 SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4831
4832 // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
4833 // is "undef". We wanted 0, so CSEL it directly.
4834 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
4835 ISD::SETEQ, dl, DAG);
4836 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
4837 HiBitsForLo =
4838 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
4839 HiBitsForLo, CCVal, Cmp);
4840
4841 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
4842 DAG.getConstant(VTBits, dl, MVT::i64));
4843
4844 SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4845 SDValue LoForNormalShift =
4846 DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
4847
4848 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
4849 dl, DAG);
4850 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
4851 SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4852 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
4853 LoForNormalShift, CCVal, Cmp);
4854
4855 // AArch64 shifts larger than the register width are wrapped rather than
4856 // clamped, so we can't just emit "hi >> x".
4857 SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4858 SDValue HiForBigShift =
4859 Opc == ISD::SRA
4860 ? DAG.getNode(Opc, dl, VT, ShOpHi,
4861 DAG.getConstant(VTBits - 1, dl, MVT::i64))
4862 : DAG.getConstant(0, dl, VT);
4863 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
4864 HiForNormalShift, CCVal, Cmp);
4865
4866 SDValue Ops[2] = { Lo, Hi };
4867 return DAG.getMergeValues(Ops, dl);
4868}
4869
4870/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4871/// i64 values and take a 2 x i64 value to shift plus a shift amount.
4872SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
4873 SelectionDAG &DAG) const {
4874 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4874, __extension__ __PRETTY_FUNCTION__))
;
4875 EVT VT = Op.getValueType();
4876 unsigned VTBits = VT.getSizeInBits();
4877 SDLoc dl(Op);
4878 SDValue ShOpLo = Op.getOperand(0);
4879 SDValue ShOpHi = Op.getOperand(1);
4880 SDValue ShAmt = Op.getOperand(2);
4881
4882 assert(Op.getOpcode() == ISD::SHL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SHL_PARTS) ?
void (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4882, __extension__ __PRETTY_FUNCTION__))
;
4883 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
4884 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
4885 SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4886
4887 // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
4888 // is "undef". We wanted 0, so CSEL it directly.
4889 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
4890 ISD::SETEQ, dl, DAG);
4891 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
4892 LoBitsForHi =
4893 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
4894 LoBitsForHi, CCVal, Cmp);
4895
4896 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
4897 DAG.getConstant(VTBits, dl, MVT::i64));
4898 SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4899 SDValue HiForNormalShift =
4900 DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
4901
4902 SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4903
4904 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
4905 dl, DAG);
4906 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
4907 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
4908 HiForNormalShift, CCVal, Cmp);
4909
4910 // AArch64 shifts of larger than register sizes are wrapped rather than
4911 // clamped, so we can't just emit "lo << a" if a is too big.
4912 SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
4913 SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4914 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
4915 LoForNormalShift, CCVal, Cmp);
4916
4917 SDValue Ops[2] = { Lo, Hi };
4918 return DAG.getMergeValues(Ops, dl);
4919}
4920
4921bool AArch64TargetLowering::isOffsetFoldingLegal(
4922 const GlobalAddressSDNode *GA) const {
4923 DEBUG(dbgs() << "Skipping offset folding global address: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Skipping offset folding global address: "
; } } while (false)
;
4924 DEBUG(GA->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { GA->dump(); } } while (false)
;
4925 DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64 doesn't support folding offsets into global "
"addresses\n"; } } while (false)
4926 "addresses\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64 doesn't support folding offsets into global "
"addresses\n"; } } while (false)
;
4927 return false;
4928}
4929
4930bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4931 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
4932 // FIXME: We should be able to handle f128 as well with a clever lowering.
4933 if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) {
4934 DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal fp imm: materialize 0 using the zero register\n"
; } } while (false)
;
4935 return true;
4936 }
4937
4938 StringRef FPType;
4939 bool IsLegal = false;
4940 SmallString<128> ImmStrVal;
4941 Imm.toString(ImmStrVal);
4942
4943 if (VT == MVT::f64) {
4944 FPType = "f64";
4945 IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
4946 } else if (VT == MVT::f32) {
4947 FPType = "f32";
4948 IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
4949 } else if (VT == MVT::f16 && Subtarget->hasFullFP16()) {
4950 FPType = "f16";
4951 IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
4952 }
4953
4954 if (IsLegal) {
4955 DEBUG(dbgs() << "Legal " << FPType << " imm value: " << ImmStrVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal " << FPType
<< " imm value: " << ImmStrVal << "\n"; } }
while (false)
;
4956 return true;
4957 }
4958
4959 if (!FPType.empty())
4960 DEBUG(dbgs() << "Illegal " << FPType << " imm value: " << ImmStrVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal " << FPType
<< " imm value: " << ImmStrVal << "\n"; } }
while (false)
;
4961 else
4962 DEBUG(dbgs() << "Illegal fp imm " << ImmStrVal << ": unsupported fp type\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal fp imm " <<
ImmStrVal << ": unsupported fp type\n"; } } while (false
)
;
4963
4964 return false;
4965}
4966
4967//===----------------------------------------------------------------------===//
4968// AArch64 Optimization Hooks
4969//===----------------------------------------------------------------------===//
4970
4971static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
4972 SDValue Operand, SelectionDAG &DAG,
4973 int &ExtraSteps) {
4974 EVT VT = Operand.getValueType();
4975 if (ST->hasNEON() &&
4976 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
4977 VT == MVT::f32 || VT == MVT::v1f32 ||
4978 VT == MVT::v2f32 || VT == MVT::v4f32)) {
4979 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
4980 // For the reciprocal estimates, convergence is quadratic, so the number
4981 // of digits is doubled after each iteration. In ARMv8, the accuracy of
4982 // the initial estimate is 2^-8. Thus the number of extra steps to refine
4983 // the result for float (23 mantissa bits) is 2 and for double (52
4984 // mantissa bits) is 3.
4985 ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
4986
4987 return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
4988 }
4989
4990 return SDValue();
4991}
4992
4993SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
4994 SelectionDAG &DAG, int Enabled,
4995 int &ExtraSteps,
4996 bool &UseOneConst,
4997 bool Reciprocal) const {
4998 if (Enabled == ReciprocalEstimate::Enabled ||
4999 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
5000 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
5001 DAG, ExtraSteps)) {
5002 SDLoc DL(Operand);
5003 EVT VT = Operand.getValueType();
5004
5005 SDNodeFlags Flags;
5006 Flags.setUnsafeAlgebra(true);
5007
5008 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
5009 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
5010 for (int i = ExtraSteps; i > 0; --i) {
5011 SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
5012 Flags);
5013 Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
5014 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5015 }
5016
5017 if (!Reciprocal) {
5018 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
5019 VT);
5020 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
5021 SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
5022
5023 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
5024 // Correct the result if the operand is 0.0.
5025 Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
5026 VT, Eq, Operand, Estimate);
5027 }
5028
5029 ExtraSteps = 0;
5030 return Estimate;
5031 }
5032
5033 return SDValue();
5034}
5035
5036SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
5037 SelectionDAG &DAG, int Enabled,
5038 int &ExtraSteps) const {
5039 if (Enabled == ReciprocalEstimate::Enabled)
5040 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
5041 DAG, ExtraSteps)) {
5042 SDLoc DL(Operand);
5043 EVT VT = Operand.getValueType();
5044
5045 SDNodeFlags Flags;
5046 Flags.setUnsafeAlgebra(true);
5047
5048 // Newton reciprocal iteration: E * (2 - X * E)
5049 // AArch64 reciprocal iteration instruction: (2 - M * N)
5050 for (int i = ExtraSteps; i > 0; --i) {
5051 SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
5052 Estimate, Flags);
5053 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5054 }
5055
5056 ExtraSteps = 0;
5057 return Estimate;
5058 }
5059
5060 return SDValue();
5061}
5062
5063//===----------------------------------------------------------------------===//
5064// AArch64 Inline Assembly Support
5065//===----------------------------------------------------------------------===//
5066
5067// Table of Constraints
5068// TODO: This is the current set of constraints supported by ARM for the
5069// compiler, not all of them may make sense, e.g. S may be difficult to support.
5070//
5071// r - A general register
5072// w - An FP/SIMD register of some size in the range v0-v31
5073// x - An FP/SIMD register of some size in the range v0-v15
5074// I - Constant that can be used with an ADD instruction
5075// J - Constant that can be used with a SUB instruction
5076// K - Constant that can be used with a 32-bit logical instruction
5077// L - Constant that can be used with a 64-bit logical instruction
5078// M - Constant that can be used as a 32-bit MOV immediate
5079// N - Constant that can be used as a 64-bit MOV immediate
5080// Q - A memory reference with base register and no offset
5081// S - A symbolic address
5082// Y - Floating point constant zero
5083// Z - Integer constant zero
5084//
5085// Note that general register operands will be output using their 64-bit x
5086// register name, whatever the size of the variable, unless the asm operand
5087// is prefixed by the %w modifier. Floating-point and SIMD register operands
5088// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
5089// %q modifier.
5090const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5091 // At this point, we have to lower this constraint to something else, so we
5092 // lower it to an "r" or "w". However, by doing this we will force the result
5093 // to be in register, while the X constraint is much more permissive.
5094 //
5095 // Although we are correct (we are free to emit anything, without
5096 // constraints), we might break use cases that would expect us to be more
5097 // efficient and emit something else.
5098 if (!Subtarget->hasFPARMv8())
5099 return "r";
5100
5101 if (ConstraintVT.isFloatingPoint())
5102 return "w";
5103
5104 if (ConstraintVT.isVector() &&
5105 (ConstraintVT.getSizeInBits() == 64 ||
5106 ConstraintVT.getSizeInBits() == 128))
5107 return "w";
5108
5109 return "r";
5110}
5111
5112/// getConstraintType - Given a constraint letter, return the type of
5113/// constraint it is for this target.
5114AArch64TargetLowering::ConstraintType
5115AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
5116 if (Constraint.size() == 1) {
5117 switch (Constraint[0]) {
5118 default:
5119 break;
5120 case 'z':
5121 return C_Other;
5122 case 'x':
5123 case 'w':
5124 return C_RegisterClass;
5125 // An address with a single base register. Due to the way we
5126 // currently handle addresses it is the same as 'r'.
5127 case 'Q':
5128 return C_Memory;
5129 }
5130 }
5131 return TargetLowering::getConstraintType(Constraint);
5132}
5133
5134/// Examine constraint type and operand type and determine a weight value.
5135/// This object must already have been set up with the operand type
5136/// and the current alternative constraint selected.
5137TargetLowering::ConstraintWeight
5138AArch64TargetLowering::getSingleConstraintMatchWeight(
5139 AsmOperandInfo &info, const char *constraint) const {
5140 ConstraintWeight weight = CW_Invalid;
5141 Value *CallOperandVal = info.CallOperandVal;
5142 // If we don't have a value, we can't do a match,
5143 // but allow it at the lowest weight.
5144 if (!CallOperandVal)
5145 return CW_Default;
5146 Type *type = CallOperandVal->getType();
5147 // Look at the constraint type.
5148 switch (*constraint) {
5149 default:
5150 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
5151 break;
5152 case 'x':
5153 case 'w':
5154 if (type->isFloatingPointTy() || type->isVectorTy())
5155 weight = CW_Register;
5156 break;
5157 case 'z':
5158 weight = CW_Constant;
5159 break;
5160 }
5161 return weight;
5162}
5163
5164std::pair<unsigned, const TargetRegisterClass *>
5165AArch64TargetLowering::getRegForInlineAsmConstraint(
5166 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5167 if (Constraint.size() == 1) {
5168 switch (Constraint[0]) {
5169 case 'r':
5170 if (VT.getSizeInBits() == 64)
5171 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
5172 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
5173 case 'w':
5174 if (VT.getSizeInBits() == 16)
5175 return std::make_pair(0U, &AArch64::FPR16RegClass);
5176 if (VT.getSizeInBits() == 32)
5177 return std::make_pair(0U, &AArch64::FPR32RegClass);
5178 if (VT.getSizeInBits() == 64)
5179 return std::make_pair(0U, &AArch64::FPR64RegClass);
5180 if (VT.getSizeInBits() == 128)
5181 return std::make_pair(0U, &AArch64::FPR128RegClass);
5182 break;
5183 // The instructions that this constraint is designed for can
5184 // only take 128-bit registers so just use that regclass.
5185 case 'x':
5186 if (VT.getSizeInBits() == 128)
5187 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
5188 break;
5189 }
5190 }
5191 if (StringRef("{cc}").equals_lower(Constraint))
5192 return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
5193
5194 // Use the default implementation in TargetLowering to convert the register
5195 // constraint into a member of a register class.
5196 std::pair<unsigned, const TargetRegisterClass *> Res;
5197 Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5198
5199 // Not found as a standard register?
5200 if (!Res.second) {
5201 unsigned Size = Constraint.size();
5202 if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
5203 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
5204 int RegNo;
5205 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
5206 if (!Failed && RegNo >= 0 && RegNo <= 31) {
5207 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
5208 // By default we'll emit v0-v31 for this unless there's a modifier where
5209 // we'll emit the correct register as well.
5210 if (VT != MVT::Other && VT.getSizeInBits() == 64) {
5211 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
5212 Res.second = &AArch64::FPR64RegClass;
5213 } else {
5214 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
5215 Res.second = &AArch64::FPR128RegClass;
5216 }
5217 }
5218 }
5219 }
5220
5221 return Res;
5222}
5223
5224/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5225/// vector. If it is invalid, don't add anything to Ops.
5226void AArch64TargetLowering::LowerAsmOperandForConstraint(
5227 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
5228 SelectionDAG &DAG) const {
5229 SDValue Result;
5230
5231 // Currently only support length 1 constraints.
5232 if (Constraint.length() != 1)
5233 return;
5234
5235 char ConstraintLetter = Constraint[0];
5236 switch (ConstraintLetter) {
5237 default:
5238 break;
5239
5240 // This set of constraints deal with valid constants for various instructions.
5241 // Validate and return a target constant for them if we can.
5242 case 'z': {
5243 // 'z' maps to xzr or wzr so it needs an input of 0.
5244 if (!isNullConstant(Op))
5245 return;
5246
5247 if (Op.getValueType() == MVT::i64)
5248 Result = DAG.getRegister(AArch64::XZR, MVT::i64);
5249 else
5250 Result = DAG.getRegister(AArch64::WZR, MVT::i32);
5251 break;
5252 }
5253
5254 case 'I':
5255 case 'J':
5256 case 'K':
5257 case 'L':
5258 case 'M':
5259 case 'N':
5260 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5261 if (!C)
5262 return;
5263
5264 // Grab the value and do some validation.
5265 uint64_t CVal = C->getZExtValue();
5266 switch (ConstraintLetter) {
5267 // The I constraint applies only to simple ADD or SUB immediate operands:
5268 // i.e. 0 to 4095 with optional shift by 12
5269 // The J constraint applies only to ADD or SUB immediates that would be
5270 // valid when negated, i.e. if [an add pattern] were to be output as a SUB
5271 // instruction [or vice versa], in other words -1 to -4095 with optional
5272 // left shift by 12.
5273 case 'I':
5274 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
5275 break;
5276 return;
5277 case 'J': {
5278 uint64_t NVal = -C->getSExtValue();
5279 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
5280 CVal = C->getSExtValue();
5281 break;
5282 }
5283 return;
5284 }
5285 // The K and L constraints apply *only* to logical immediates, including
5286 // what used to be the MOVI alias for ORR (though the MOVI alias has now
5287 // been removed and MOV should be used). So these constraints have to
5288 // distinguish between bit patterns that are valid 32-bit or 64-bit
5289 // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
5290 // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
5291 // versa.
5292 case 'K':
5293 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5294 break;
5295 return;
5296 case 'L':
5297 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5298 break;
5299 return;
5300 // The M and N constraints are a superset of K and L respectively, for use
5301 // with the MOV (immediate) alias. As well as the logical immediates they
5302 // also match 32 or 64-bit immediates that can be loaded either using a
5303 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
5304 // (M) or 64-bit 0x1234000000000000 (N) etc.
5305 // As a note some of this code is liberally stolen from the asm parser.
5306 case 'M': {
5307 if (!isUInt<32>(CVal))
5308 return;
5309 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5310 break;
5311 if ((CVal & 0xFFFF) == CVal)
5312 break;
5313 if ((CVal & 0xFFFF0000ULL) == CVal)
5314 break;
5315 uint64_t NCVal = ~(uint32_t)CVal;
5316 if ((NCVal & 0xFFFFULL) == NCVal)
5317 break;
5318 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5319 break;
5320 return;
5321 }
5322 case 'N': {
5323 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5324 break;
5325 if ((CVal & 0xFFFFULL) == CVal)
5326 break;
5327 if ((CVal & 0xFFFF0000ULL) == CVal)
5328 break;
5329 if ((CVal & 0xFFFF00000000ULL) == CVal)
5330 break;
5331 if ((CVal & 0xFFFF000000000000ULL) == CVal)
5332 break;
5333 uint64_t NCVal = ~CVal;
5334 if ((NCVal & 0xFFFFULL) == NCVal)
5335 break;
5336 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5337 break;
5338 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
5339 break;
5340 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
5341 break;
5342 return;
5343 }
5344 default:
5345 return;
5346 }
5347
5348 // All assembler immediates are 64-bit integers.
5349 Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
5350 break;
5351 }
5352
5353 if (Result.getNode()) {
5354 Ops.push_back(Result);
5355 return;
5356 }
5357
5358 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5359}
5360
5361//===----------------------------------------------------------------------===//
5362// AArch64 Advanced SIMD Support
5363//===----------------------------------------------------------------------===//
5364
5365/// WidenVector - Given a value in the V64 register class, produce the
5366/// equivalent value in the V128 register class.
5367static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
5368 EVT VT = V64Reg.getValueType();
5369 unsigned NarrowSize = VT.getVectorNumElements();
5370 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5371 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
5372 SDLoc DL(V64Reg);
5373
5374 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
5375 V64Reg, DAG.getConstant(0, DL, MVT::i32));
5376}
5377
5378/// getExtFactor - Determine the adjustment factor for the position when
5379/// generating an "extract from vector registers" instruction.
5380static unsigned getExtFactor(SDValue &V) {
5381 EVT EltType = V.getValueType().getVectorElementType();
5382 return EltType.getSizeInBits() / 8;
5383}
5384
5385/// NarrowVector - Given a value in the V128 register class, produce the
5386/// equivalent value in the V64 register class.
5387static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
5388 EVT VT = V128Reg.getValueType();
5389 unsigned WideSize = VT.getVectorNumElements();
5390 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5391 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
5392 SDLoc DL(V128Reg);
5393
5394 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
5395}
5396
5397// Gather data to see if the operation can be modelled as a
5398// shuffle in combination with VEXTs.
5399SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
5400 SelectionDAG &DAG) const {
5401 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5401, __extension__ __PRETTY_FUNCTION__))
;
5402 DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::ReconstructShuffle\n"
; } } while (false)
;
5403 SDLoc dl(Op);
5404 EVT VT = Op.getValueType();
5405 unsigned NumElts = VT.getVectorNumElements();
5406
5407 struct ShuffleSourceInfo {
5408 SDValue Vec;
5409 unsigned MinElt;
5410 unsigned MaxElt;
5411
5412 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
5413 // be compatible with the shuffle we intend to construct. As a result
5414 // ShuffleVec will be some sliding window into the original Vec.
5415 SDValue ShuffleVec;
5416
5417 // Code should guarantee that element i in Vec starts at element "WindowBase
5418 // + i * WindowScale in ShuffleVec".
5419 int WindowBase;
5420 int WindowScale;
5421
5422 ShuffleSourceInfo(SDValue Vec)
5423 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
5424 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
5425
5426 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
5427 };
5428
5429 // First gather all vectors used as an immediate source for this BUILD_VECTOR
5430 // node.
5431 SmallVector<ShuffleSourceInfo, 2> Sources;
5432 for (unsigned i = 0; i < NumElts; ++i) {
5433 SDValue V = Op.getOperand(i);
5434 if (V.isUndef())
5435 continue;
5436 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5437 !isa<ConstantSDNode>(V.getOperand(1))) {
5438 DEBUG(dbgs() << "Reshuffle failed: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5439 "a shuffle can only come from building a vector from "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5440 "various elements of other vectors, provided their "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5441 "indices are constant\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
;
5442 return SDValue();
5443 }
5444
5445 // Add this element source to the list if it's not already there.
5446 SDValue SourceVec = V.getOperand(0);
5447 auto Source = find(Sources, SourceVec);
5448 if (Source == Sources.end())
5449 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
5450
5451 // Update the minimum and maximum lane number seen.
5452 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
5453 Source->MinElt = std::min(Source->MinElt, EltNo);
5454 Source->MaxElt = std::max(Source->MaxElt, EltNo);
5455 }
5456
5457 if (Sources.size() > 2) {
5458 DEBUG(dbgs() << "Reshuffle failed: currently only do something sane when at "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
5459 "most two source vectors are involved\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
;
5460 return SDValue();
5461 }
5462
5463 // Find out the smallest element size among result and two sources, and use
5464 // it as element size to build the shuffle_vector.
5465 EVT SmallestEltTy = VT.getVectorElementType();
5466 for (auto &Source : Sources) {
5467 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
5468 if (SrcEltTy.bitsLT(SmallestEltTy)) {
5469 SmallestEltTy = SrcEltTy;
5470 }
5471 }
5472 unsigned ResMultiplier =
5473 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
5474 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
5475 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
5476
5477 // If the source vector is too wide or too narrow, we may nevertheless be able
5478 // to construct a compatible shuffle either by concatenating it with UNDEF or
5479 // extracting a suitable range of elements.
5480 for (auto &Src : Sources) {
5481 EVT SrcVT = Src.ShuffleVec.getValueType();
5482
5483 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
5484 continue;
5485
5486 // This stage of the search produces a source with the same element type as
5487 // the original, but with a total width matching the BUILD_VECTOR output.
5488 EVT EltVT = SrcVT.getVectorElementType();
5489 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
5490 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
5491
5492 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
5493 assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits())(static_cast <bool> (2 * SrcVT.getSizeInBits() == VT.getSizeInBits
()) ? void (0) : __assert_fail ("2 * SrcVT.getSizeInBits() == VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5493, __extension__ __PRETTY_FUNCTION__))
;
5494 // We can pad out the smaller vector for free, so if it's part of a
5495 // shuffle...
5496 Src.ShuffleVec =
5497 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
5498 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
5499 continue;
5500 }
5501
5502 assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits())(static_cast <bool> (SrcVT.getSizeInBits() == 2 * VT.getSizeInBits
()) ? void (0) : __assert_fail ("SrcVT.getSizeInBits() == 2 * VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5502, __extension__ __PRETTY_FUNCTION__))
;
5503
5504 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
5505 DEBUG(dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
;
5506 return SDValue();
5507 }
5508
5509 if (Src.MinElt >= NumSrcElts) {
5510 // The extraction can just take the second half
5511 Src.ShuffleVec =
5512 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5513 DAG.getConstant(NumSrcElts, dl, MVT::i64));
5514 Src.WindowBase = -NumSrcElts;
5515 } else if (Src.MaxElt < NumSrcElts) {
5516 // The extraction can just take the first half
5517 Src.ShuffleVec =
5518 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5519 DAG.getConstant(0, dl, MVT::i64));
5520 } else {
5521 // An actual VEXT is needed
5522 SDValue VEXTSrc1 =
5523 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5524 DAG.getConstant(0, dl, MVT::i64));
5525 SDValue VEXTSrc2 =
5526 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5527 DAG.getConstant(NumSrcElts, dl, MVT::i64));
5528 unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
5529
5530 Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
5531 VEXTSrc2,
5532 DAG.getConstant(Imm, dl, MVT::i32));
5533 Src.WindowBase = -Src.MinElt;
5534 }
5535 }
5536
5537 // Another possible incompatibility occurs from the vector element types. We
5538 // can fix this by bitcasting the source vectors to the same type we intend
5539 // for the shuffle.
5540 for (auto &Src : Sources) {
5541 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
5542 if (SrcEltTy == SmallestEltTy)
5543 continue;
5544 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)(static_cast <bool> (ShuffleVT.getVectorElementType() ==
SmallestEltTy) ? void (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5544, __extension__ __PRETTY_FUNCTION__))
;
5545 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
5546 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
5547 Src.WindowBase *= Src.WindowScale;
5548 }
5549
5550 // Final sanity check before we try to actually produce a shuffle.
5551 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5553, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
5552 for (auto Src : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5553, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
5553 assert(Src.ShuffleVec.getValueType() == ShuffleVT);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5553, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
5554 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5553, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
;
5555
5556 // The stars all align, our next step is to produce the mask for the shuffle.
5557 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
5558 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
5559 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
5560 SDValue Entry = Op.getOperand(i);
5561 if (Entry.isUndef())
5562 continue;
5563
5564 auto Src = find(Sources, Entry.getOperand(0));
5565 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
5566
5567 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
5568 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
5569 // segment.
5570 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
5571 int BitsDefined =
5572 std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits());
5573 int LanesDefined = BitsDefined / BitsPerShuffleLane;
5574
5575 // This source is expected to fill ResMultiplier lanes of the final shuffle,
5576 // starting at the appropriate offset.
5577 int *LaneMask = &Mask[i * ResMultiplier];
5578
5579 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
5580 ExtractBase += NumElts * (Src - Sources.begin());
5581 for (int j = 0; j < LanesDefined; ++j)
5582 LaneMask[j] = ExtractBase + j;
5583 }
5584
5585 // Final check before we try to produce nonsense...
5586 if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
5587 DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: illegal shuffle mask\n"
; } } while (false)
;
5588 return SDValue();
5589 }
5590
5591 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
5592 for (unsigned i = 0; i < Sources.size(); ++i)
5593 ShuffleOps[i] = Sources[i].ShuffleVec;
5594
5595 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
5596 ShuffleOps[1], Mask);
5597 SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
5598
5599 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5600 dbgs() << "Reshuffle, creating node: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5601 Shuffle.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5602 dbgs() << "Reshuffle, creating node: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5603 V.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5604 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
;
5605
5606 return V;
5607}
5608
5609// check if an EXT instruction can handle the shuffle mask when the
5610// vector sources of the shuffle are the same.
5611static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5612 unsigned NumElts = VT.getVectorNumElements();
5613
5614 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5615 if (M[0] < 0)
5616 return false;
5617
5618 Imm = M[0];
5619
5620 // If this is a VEXT shuffle, the immediate value is the index of the first
5621 // element. The other shuffle indices must be the successive elements after
5622 // the first one.
5623 unsigned ExpectedElt = Imm;
5624 for (unsigned i = 1; i < NumElts; ++i) {
5625 // Increment the expected index. If it wraps around, just follow it
5626 // back to index zero and keep going.
5627 ++ExpectedElt;
5628 if (ExpectedElt == NumElts)
5629 ExpectedElt = 0;
5630
5631 if (M[i] < 0)
5632 continue; // ignore UNDEF indices
5633 if (ExpectedElt != static_cast<unsigned>(M[i]))
5634 return false;
5635 }
5636
5637 return true;
5638}
5639
5640// check if an EXT instruction can handle the shuffle mask when the
5641// vector sources of the shuffle are different.
5642static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
5643 unsigned &Imm) {
5644 // Look for the first non-undef element.
5645 const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
5646
5647 // Benefit form APInt to handle overflow when calculating expected element.
5648 unsigned NumElts = VT.getVectorNumElements();
5649 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
5650 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
5651 // The following shuffle indices must be the successive elements after the
5652 // first real element.
5653 const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
5654 [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
5655 if (FirstWrongElt != M.end())
5656 return false;
5657
5658 // The index of an EXT is the first element if it is not UNDEF.
5659 // Watch out for the beginning UNDEFs. The EXT index should be the expected
5660 // value of the first element. E.g.
5661 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
5662 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
5663 // ExpectedElt is the last mask index plus 1.
5664 Imm = ExpectedElt.getZExtValue();
5665
5666 // There are two difference cases requiring to reverse input vectors.
5667 // For example, for vector <4 x i32> we have the following cases,
5668 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
5669 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
5670 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
5671 // to reverse two input vectors.
5672 if (Imm < NumElts)
5673 ReverseEXT = true;
5674 else
5675 Imm -= NumElts;
5676
5677 return true;
5678}
5679
5680/// isREVMask - Check if a vector shuffle corresponds to a REV
5681/// instruction with the specified blocksize. (The order of the elements
5682/// within each block of the vector is reversed.)
5683static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5684 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5685, __extension__ __PRETTY_FUNCTION__))
5685 "Only possible block sizes for REV are: 16, 32, 64")(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5685, __extension__ __PRETTY_FUNCTION__))
;
5686
5687 unsigned EltSz = VT.getScalarSizeInBits();
5688 if (EltSz == 64)
5689 return false;
5690
5691 unsigned NumElts = VT.getVectorNumElements();
5692 unsigned BlockElts = M[0] + 1;
5693 // If the first shuffle index is UNDEF, be optimistic.
5694 if (M[0] < 0)
5695 BlockElts = BlockSize / EltSz;
5696
5697 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5698 return false;
5699
5700 for (unsigned i = 0; i < NumElts; ++i) {
5701 if (M[i] < 0)
5702 continue; // ignore UNDEF indices
5703 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
5704 return false;
5705 }
5706
5707 return true;
5708}
5709
5710static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5711 unsigned NumElts = VT.getVectorNumElements();
5712 WhichResult = (M[0] == 0 ? 0 : 1);
5713 unsigned Idx = WhichResult * NumElts / 2;
5714 for (unsigned i = 0; i != NumElts; i += 2) {
5715 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
5716 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
5717 return false;
5718 Idx += 1;
5719 }
5720
5721 return true;
5722}
5723
5724static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5725 unsigned NumElts = VT.getVectorNumElements();
5726 WhichResult = (M[0] == 0 ? 0 : 1);
5727 for (unsigned i = 0; i != NumElts; ++i) {
5728 if (M[i] < 0)
5729 continue; // ignore UNDEF indices
5730 if ((unsigned)M[i] != 2 * i + WhichResult)
5731 return false;
5732 }
5733
5734 return true;
5735}
5736
5737static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5738 unsigned NumElts = VT.getVectorNumElements();
5739 WhichResult = (M[0] == 0 ? 0 : 1);
5740 for (unsigned i = 0; i < NumElts; i += 2) {
5741 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
5742 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
5743 return false;
5744 }
5745 return true;
5746}
5747
5748/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
5749/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5750/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5751static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5752 unsigned NumElts = VT.getVectorNumElements();
5753 WhichResult = (M[0] == 0 ? 0 : 1);
5754 unsigned Idx = WhichResult * NumElts / 2;
5755 for (unsigned i = 0; i != NumElts; i += 2) {
5756 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
5757 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
5758 return false;
5759 Idx += 1;
5760 }
5761
5762 return true;
5763}
5764
5765/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
5766/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5767/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5768static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5769 unsigned Half = VT.getVectorNumElements() / 2;
5770 WhichResult = (M[0] == 0 ? 0 : 1);
5771 for (unsigned j = 0; j != 2; ++j) {
5772 unsigned Idx = WhichResult;
5773 for (unsigned i = 0; i != Half; ++i) {
5774 int MIdx = M[i + j * Half];
5775 if (MIdx >= 0 && (unsigned)MIdx != Idx)
5776 return false;
5777 Idx += 2;
5778 }
5779 }
5780
5781 return true;
5782}
5783
5784/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
5785/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5786/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5787static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5788 unsigned NumElts = VT.getVectorNumElements();
5789 WhichResult = (M[0] == 0 ? 0 : 1);
5790 for (unsigned i = 0; i < NumElts; i += 2) {
5791 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
5792 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
5793 return false;
5794 }
5795 return true;
5796}
5797
5798static bool isINSMask(ArrayRef<int> M, int NumInputElements,
5799 bool &DstIsLeft, int &Anomaly) {
5800 if (M.size() != static_cast<size_t>(NumInputElements))
5801 return false;
5802
5803 int NumLHSMatch = 0, NumRHSMatch = 0;
5804 int LastLHSMismatch = -1, LastRHSMismatch = -1;
5805
5806 for (int i = 0; i < NumInputElements; ++i) {
5807 if (M[i] == -1) {
5808 ++NumLHSMatch;
5809 ++NumRHSMatch;
5810 continue;
5811 }
5812
5813 if (M[i] == i)
5814 ++NumLHSMatch;
5815 else
5816 LastLHSMismatch = i;
5817
5818 if (M[i] == i + NumInputElements)
5819 ++NumRHSMatch;
5820 else
5821 LastRHSMismatch = i;
5822 }
5823
5824 if (NumLHSMatch == NumInputElements - 1) {
5825 DstIsLeft = true;
5826 Anomaly = LastLHSMismatch;
5827 return true;
5828 } else if (NumRHSMatch == NumInputElements - 1) {
5829 DstIsLeft = false;
5830 Anomaly = LastRHSMismatch;
5831 return true;
5832 }
5833
5834 return false;
5835}
5836
5837static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
5838 if (VT.getSizeInBits() != 128)
5839 return false;
5840
5841 unsigned NumElts = VT.getVectorNumElements();
5842
5843 for (int I = 0, E = NumElts / 2; I != E; I++) {
5844 if (Mask[I] != I)
5845 return false;
5846 }
5847
5848 int Offset = NumElts / 2;
5849 for (int I = NumElts / 2, E = NumElts; I != E; I++) {
5850 if (Mask[I] != I + SplitLHS * Offset)
5851 return false;
5852 }
5853
5854 return true;
5855}
5856
5857static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
5858 SDLoc DL(Op);
5859 EVT VT = Op.getValueType();
5860 SDValue V0 = Op.getOperand(0);
5861 SDValue V1 = Op.getOperand(1);
5862 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5863
5864 if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
5865 VT.getVectorElementType() != V1.getValueType().getVectorElementType())
5866 return SDValue();
5867
5868 bool SplitV0 = V0.getValueSizeInBits() == 128;
5869
5870 if (!isConcatMask(Mask, VT, SplitV0))
5871 return SDValue();
5872
5873 EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
5874 VT.getVectorNumElements() / 2);
5875 if (SplitV0) {
5876 V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
5877 DAG.getConstant(0, DL, MVT::i64));
5878 }
5879 if (V1.getValueSizeInBits() == 128) {
5880 V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
5881 DAG.getConstant(0, DL, MVT::i64));
5882 }
5883 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
5884}
5885
5886/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
5887/// the specified operations to build the shuffle.
5888static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
5889 SDValue RHS, SelectionDAG &DAG,
5890 const SDLoc &dl) {
5891 unsigned OpNum = (PFEntry >> 26) & 0x0F;
5892 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
5893 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
5894
5895 enum {
5896 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5897 OP_VREV,
5898 OP_VDUP0,
5899 OP_VDUP1,
5900 OP_VDUP2,
5901 OP_VDUP3,
5902 OP_VEXT1,
5903 OP_VEXT2,
5904 OP_VEXT3,
5905 OP_VUZPL, // VUZP, left result
5906 OP_VUZPR, // VUZP, right result
5907 OP_VZIPL, // VZIP, left result
5908 OP_VZIPR, // VZIP, right result
5909 OP_VTRNL, // VTRN, left result
5910 OP_VTRNR // VTRN, right result
5911 };
5912
5913 if (OpNum == OP_COPY) {
5914 if (LHSID == (1 * 9 + 2) * 9 + 3)
5915 return LHS;
5916 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!")(static_cast <bool> (LHSID == ((4 * 9 + 5) * 9 + 6) * 9
+ 7 && "Illegal OP_COPY!") ? void (0) : __assert_fail
("LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5916, __extension__ __PRETTY_FUNCTION__))
;
5917 return RHS;
5918 }
5919
5920 SDValue OpLHS, OpRHS;
5921 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
5922 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
5923 EVT VT = OpLHS.getValueType();
5924
5925 switch (OpNum) {
5926 default:
5927 llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5927)
;
5928 case OP_VREV:
5929 // VREV divides the vector in half and swaps within the half.
5930 if (VT.getVectorElementType() == MVT::i32 ||
5931 VT.getVectorElementType() == MVT::f32)
5932 return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
5933 // vrev <4 x i16> -> REV32
5934 if (VT.getVectorElementType() == MVT::i16 ||
5935 VT.getVectorElementType() == MVT::f16)
5936 return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
5937 // vrev <4 x i8> -> REV16
5938 assert(VT.getVectorElementType() == MVT::i8)(static_cast <bool> (VT.getVectorElementType() == MVT::
i8) ? void (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5938, __extension__ __PRETTY_FUNCTION__))
;
5939 return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
5940 case OP_VDUP0:
5941 case OP_VDUP1:
5942 case OP_VDUP2:
5943 case OP_VDUP3: {
5944 EVT EltTy = VT.getVectorElementType();
5945 unsigned Opcode;
5946 if (EltTy == MVT::i8)
5947 Opcode = AArch64ISD::DUPLANE8;
5948 else if (EltTy == MVT::i16 || EltTy == MVT::f16)
5949 Opcode = AArch64ISD::DUPLANE16;
5950 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
5951 Opcode = AArch64ISD::DUPLANE32;
5952 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
5953 Opcode = AArch64ISD::DUPLANE64;
5954 else
5955 llvm_unreachable("Invalid vector element type?")::llvm::llvm_unreachable_internal("Invalid vector element type?"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5955)
;
5956
5957 if (VT.getSizeInBits() == 64)
5958 OpLHS = WidenVector(OpLHS, DAG);
5959 SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
5960 return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
5961 }
5962 case OP_VEXT1:
5963 case OP_VEXT2:
5964 case OP_VEXT3: {
5965 unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
5966 return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
5967 DAG.getConstant(Imm, dl, MVT::i32));
5968 }
5969 case OP_VUZPL:
5970 return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
5971 OpRHS);
5972 case OP_VUZPR:
5973 return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
5974 OpRHS);
5975 case OP_VZIPL:
5976 return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
5977 OpRHS);
5978 case OP_VZIPR:
5979 return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
5980 OpRHS);
5981 case OP_VTRNL:
5982 return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
5983 OpRHS);
5984 case OP_VTRNR:
5985 return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
5986 OpRHS);
5987 }
5988}
5989
5990static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
5991 SelectionDAG &DAG) {
5992 // Check to see if we can use the TBL instruction.
5993 SDValue V1 = Op.getOperand(0);
5994 SDValue V2 = Op.getOperand(1);
5995 SDLoc DL(Op);
5996
5997 EVT EltVT = Op.getValueType().getVectorElementType();
5998 unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
5999
6000 SmallVector<SDValue, 8> TBLMask;
6001 for (int Val : ShuffleMask) {
6002 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
6003 unsigned Offset = Byte + Val * BytesPerElt;
6004 TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
6005 }
6006 }
6007
6008 MVT IndexVT = MVT::v8i8;
6009 unsigned IndexLen = 8;
6010 if (Op.getValueSizeInBits() == 128) {
6011 IndexVT = MVT::v16i8;
6012 IndexLen = 16;
6013 }
6014
6015 SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
6016 SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
6017
6018 SDValue Shuffle;
6019 if (V2.getNode()->isUndef()) {
6020 if (IndexLen == 8)
6021 V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
6022 Shuffle = DAG.getNode(
6023 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
6024 DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
6025 DAG.getBuildVector(IndexVT, DL,
6026 makeArrayRef(TBLMask.data(), IndexLen)));
6027 } else {
6028 if (IndexLen == 8) {
6029 V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
6030 Shuffle = DAG.getNode(
6031 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
6032 DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
6033 DAG.getBuildVector(IndexVT, DL,
6034 makeArrayRef(TBLMask.data(), IndexLen)));
6035 } else {
6036 // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
6037 // cannot currently represent the register constraints on the input
6038 // table registers.
6039 // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
6040 // DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
6041 // IndexLen));
6042 Shuffle = DAG.getNode(
6043 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
6044 DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
6045 V2Cst, DAG.getBuildVector(IndexVT, DL,
6046 makeArrayRef(TBLMask.data(), IndexLen)));
6047 }
6048 }
6049 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
6050}
6051
6052static unsigned getDUPLANEOp(EVT EltType) {
6053 if (EltType == MVT::i8)
6054 return AArch64ISD::DUPLANE8;
6055 if (EltType == MVT::i16 || EltType == MVT::f16)
6056 return AArch64ISD::DUPLANE16;
6057 if (EltType == MVT::i32 || EltType == MVT::f32)
6058 return AArch64ISD::DUPLANE32;
6059 if (EltType == MVT::i64 || EltType == MVT::f64)
6060 return AArch64ISD::DUPLANE64;
6061
6062 llvm_unreachable("Invalid vector element type?")::llvm::llvm_unreachable_internal("Invalid vector element type?"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6062)
;
6063}
6064
6065SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
6066 SelectionDAG &DAG) const {
6067 SDLoc dl(Op);
6068 EVT VT = Op.getValueType();
6069
6070 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
6071
6072 // Convert shuffles that are directly supported on NEON to target-specific
6073 // DAG nodes, instead of keeping them as shuffles and matching them again
6074 // during code selection. This is more efficient and avoids the possibility
6075 // of inconsistencies between legalization and selection.
6076 ArrayRef<int> ShuffleMask = SVN->getMask();
6077
6078 SDValue V1 = Op.getOperand(0);
6079 SDValue V2 = Op.getOperand(1);
6080
6081 if (SVN->isSplat()) {
6082 int Lane = SVN->getSplatIndex();
6083 // If this is undef splat, generate it via "just" vdup, if possible.
6084 if (Lane == -1)
6085 Lane = 0;
6086
6087 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
6088 return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
6089 V1.getOperand(0));
6090 // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
6091 // constant. If so, we can just reference the lane's definition directly.
6092 if (V1.getOpcode() == ISD::BUILD_VECTOR &&
6093 !isa<ConstantSDNode>(V1.getOperand(Lane)))
6094 return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
6095
6096 // Otherwise, duplicate from the lane of the input vector.
6097 unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
6098
6099 // SelectionDAGBuilder may have "helpfully" already extracted or conatenated
6100 // to make a vector of the same size as this SHUFFLE. We can ignore the
6101 // extract entirely, and canonicalise the concat using WidenVector.
6102 if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
6103 Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
6104 V1 = V1.getOperand(0);
6105 } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {
6106 unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
6107 Lane -= Idx * VT.getVectorNumElements() / 2;
6108 V1 = WidenVector(V1.getOperand(Idx), DAG);
6109 } else if (VT.getSizeInBits() == 64)
6110 V1 = WidenVector(V1, DAG);
6111
6112 return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64));
6113 }
6114
6115 if (isREVMask(ShuffleMask, VT, 64))
6116 return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
6117 if (isREVMask(ShuffleMask, VT, 32))
6118 return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
6119 if (isREVMask(ShuffleMask, VT, 16))
6120 return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
6121
6122 bool ReverseEXT = false;
6123 unsigned Imm;
6124 if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
6125 if (ReverseEXT)
6126 std::swap(V1, V2);
6127 Imm *= getExtFactor(V1);
6128 return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
6129 DAG.getConstant(Imm, dl, MVT::i32));
6130 } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
6131 Imm *= getExtFactor(V1);
6132 return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
6133 DAG.getConstant(Imm, dl, MVT::i32));
6134 }
6135
6136 unsigned WhichResult;
6137 if (isZIPMask(ShuffleMask, VT, WhichResult)) {
6138 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
6139 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
6140 }
6141 if (isUZPMask(ShuffleMask, VT, WhichResult)) {
6142 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
6143 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
6144 }
6145 if (isTRNMask(ShuffleMask, VT, WhichResult)) {
6146 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
6147 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
6148 }
6149
6150 if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
6151 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
6152 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
6153 }
6154 if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
6155 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
6156 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
6157 }
6158 if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
6159 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
6160 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
6161 }
6162
6163 if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
6164 return Concat;
6165
6166 bool DstIsLeft;
6167 int Anomaly;
6168 int NumInputElements = V1.getValueType().getVectorNumElements();
6169 if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
6170 SDValue DstVec = DstIsLeft ? V1 : V2;
6171 SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
6172
6173 SDValue SrcVec = V1;
6174 int SrcLane = ShuffleMask[Anomaly];
6175 if (SrcLane >= NumInputElements) {
6176 SrcVec = V2;
6177 SrcLane -= VT.getVectorNumElements();
6178 }
6179 SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
6180
6181 EVT ScalarVT = VT.getVectorElementType();
6182
6183 if (ScalarVT.getSizeInBits() < 32 && ScalarVT.isInteger())
6184 ScalarVT = MVT::i32;
6185
6186 return DAG.getNode(
6187 ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
6188 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
6189 DstLaneV);
6190 }
6191
6192 // If the shuffle is not directly supported and it has 4 elements, use
6193 // the PerfectShuffle-generated table to synthesize it from other shuffles.
6194 unsigned NumElts = VT.getVectorNumElements();
6195 if (NumElts == 4) {
6196 unsigned PFIndexes[4];
6197 for (unsigned i = 0; i != 4; ++i) {
6198 if (ShuffleMask[i] < 0)
6199 PFIndexes[i] = 8;
6200 else
6201 PFIndexes[i] = ShuffleMask[i];
6202 }
6203
6204 // Compute the index in the perfect shuffle table.
6205 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
6206 PFIndexes[2] * 9 + PFIndexes[3];
6207 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6208 unsigned Cost = (PFEntry >> 30);
6209
6210 if (Cost <= 4)
6211 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
6212 }
6213
6214 return GenerateTBL(Op, ShuffleMask, DAG);
6215}
6216
6217static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
6218 APInt &UndefBits) {
6219 EVT VT = BVN->getValueType(0);
6220 APInt SplatBits, SplatUndef;
6221 unsigned SplatBitSize;
6222 bool HasAnyUndefs;
6223 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6224 unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
6225
6226 for (unsigned i = 0; i < NumSplats; ++i) {
6227 CnstBits <<= SplatBitSize;
6228 UndefBits <<= SplatBitSize;
6229 CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
6230 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
6231 }
6232
6233 return true;
6234 }
6235
6236 return false;
6237}
6238
6239SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
6240 SelectionDAG &DAG) const {
6241 BuildVectorSDNode *BVN =
6242 dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
6243 SDValue LHS = Op.getOperand(0);
6244 SDLoc dl(Op);
6245 EVT VT = Op.getValueType();
6246
6247 if (!BVN)
6248 return Op;
6249
6250 APInt CnstBits(VT.getSizeInBits(), 0);
6251 APInt UndefBits(VT.getSizeInBits(), 0);
6252 if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
6253 // We only have BIC vector immediate instruction, which is and-not.
6254 CnstBits = ~CnstBits;
6255
6256 // We make use of a little bit of goto ickiness in order to avoid having to
6257 // duplicate the immediate matching logic for the undef toggled case.
6258 bool SecondTry = false;
6259 AttemptModImm:
6260
6261 if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
6262 CnstBits = CnstBits.zextOrTrunc(64);
6263 uint64_t CnstVal = CnstBits.getZExtValue();
6264
6265 if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
6266 CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
6267 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6268 SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6269 DAG.getConstant(CnstVal, dl, MVT::i32),
6270 DAG.getConstant(0, dl, MVT::i32));
6271 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6272 }
6273
6274 if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
6275 CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
6276 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6277 SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6278 DAG.getConstant(CnstVal, dl, MVT::i32),
6279 DAG.getConstant(8, dl, MVT::i32));
6280 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6281 }
6282
6283 if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
6284 CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
6285 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6286 SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6287 DAG.getConstant(CnstVal, dl, MVT::i32),
6288 DAG.getConstant(16, dl, MVT::i32));
6289 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6290 }
6291
6292 if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
6293 CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
6294 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6295 SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6296 DAG.getConstant(CnstVal, dl, MVT::i32),
6297 DAG.getConstant(24, dl, MVT::i32));
6298 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6299 }
6300
6301 if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
6302 CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
6303 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6304 SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6305 DAG.getConstant(CnstVal, dl, MVT::i32),
6306 DAG.getConstant(0, dl, MVT::i32));
6307 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6308 }
6309
6310 if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
6311 CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
6312 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6313 SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6314 DAG.getConstant(CnstVal, dl, MVT::i32),
6315 DAG.getConstant(8, dl, MVT::i32));
6316 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6317 }
6318 }
6319
6320 if (SecondTry)
6321 goto FailedModImm;
6322 SecondTry = true;
6323 CnstBits = ~UndefBits;
6324 goto AttemptModImm;
6325 }
6326
6327// We can always fall back to a non-immediate AND.
6328FailedModImm:
6329 return Op;
6330}
6331
6332// Specialized code to quickly find if PotentialBVec is a BuildVector that
6333// consists of only the same constant int value, returned in reference arg
6334// ConstVal
6335static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
6336 uint64_t &ConstVal) {
6337 BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
6338 if (!Bvec)
6339 return false;
6340 ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
6341 if (!FirstElt)
6342 return false;
6343 EVT VT = Bvec->getValueType(0);
6344 unsigned NumElts = VT.getVectorNumElements();
6345 for (unsigned i = 1; i < NumElts; ++i)
6346 if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
6347 return false;
6348 ConstVal = FirstElt->getZExtValue();
6349 return true;
6350}
6351
6352static unsigned getIntrinsicID(const SDNode *N) {
6353 unsigned Opcode = N->getOpcode();
6354 switch (Opcode) {
6355 default:
6356 return Intrinsic::not_intrinsic;
6357 case ISD::INTRINSIC_WO_CHAIN: {
6358 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
6359 if (IID < Intrinsic::num_intrinsics)
6360 return IID;
6361 return Intrinsic::not_intrinsic;
6362 }
6363 }
6364}
6365
6366// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
6367// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
6368// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
6369// Also, logical shift right -> sri, with the same structure.
6370static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
6371 EVT VT = N->getValueType(0);
6372
6373 if (!VT.isVector())
6374 return SDValue();
6375
6376 SDLoc DL(N);
6377
6378 // Is the first op an AND?
6379 const SDValue And = N->getOperand(0);
6380 if (And.getOpcode() != ISD::AND)
6381 return SDValue();
6382
6383 // Is the second op an shl or lshr?
6384 SDValue Shift = N->getOperand(1);
6385 // This will have been turned into: AArch64ISD::VSHL vector, #shift
6386 // or AArch64ISD::VLSHR vector, #shift
6387 unsigned ShiftOpc = Shift.getOpcode();
6388 if ((ShiftOpc != AArch64ISD::VSHL && ShiftOpc != AArch64ISD::VLSHR))
6389 return SDValue();
6390 bool IsShiftRight = ShiftOpc == AArch64ISD::VLSHR;
6391
6392 // Is the shift amount constant?
6393 ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
6394 if (!C2node)
6395 return SDValue();
6396
6397 // Is the and mask vector all constant?
6398 uint64_t C1;
6399 if (!isAllConstantBuildVector(And.getOperand(1), C1))
6400 return SDValue();
6401
6402 // Is C1 == ~C2, taking into account how much one can shift elements of a
6403 // particular size?
6404 uint64_t C2 = C2node->getZExtValue();
6405 unsigned ElemSizeInBits = VT.getScalarSizeInBits();
6406 if (C2 > ElemSizeInBits)
6407 return SDValue();
6408 unsigned ElemMask = (1 << ElemSizeInBits) - 1;
6409 if ((C1 & ElemMask) != (~C2 & ElemMask))
6410 return SDValue();
6411
6412 SDValue X = And.getOperand(0);
6413 SDValue Y = Shift.getOperand(0);
6414
6415 unsigned Intrin =
6416 IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
6417 SDValue ResultSLI =
6418 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
6419 DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
6420 Shift.getOperand(1));
6421
6422 DEBUG(dbgs() << "aarch64-lower: transformed: \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "aarch64-lower: transformed: \n"
; } } while (false)
;
6423 DEBUG(N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { N->dump(&DAG); } } while (false)
;
6424 DEBUG(dbgs() << "into: \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "into: \n"; } } while (false
)
;
6425 DEBUG(ResultSLI->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { ResultSLI->dump(&DAG); } } while (
false)
;
6426
6427 ++NumShiftInserts;
6428 return ResultSLI;
6429}
6430
6431SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
6432 SelectionDAG &DAG) const {
6433 // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
6434 if (EnableAArch64SlrGeneration) {
6435 if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
6436 return Res;
6437 }
6438
6439 BuildVectorSDNode *BVN =
6440 dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
6441 SDValue LHS = Op.getOperand(1);
6442 SDLoc dl(Op);
6443 EVT VT = Op.getValueType();
6444
6445 // OR commutes, so try swapping the operands.
6446 if (!BVN) {
6447 LHS = Op.getOperand(0);
6448 BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
6449 }
6450 if (!BVN)
6451 return Op;
6452
6453 APInt CnstBits(VT.getSizeInBits(), 0);
6454 APInt UndefBits(VT.getSizeInBits(), 0);
6455 if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
6456 // We make use of a little bit of goto ickiness in order to avoid having to
6457 // duplicate the immediate matching logic for the undef toggled case.
6458 bool SecondTry = false;
6459 AttemptModImm:
6460
6461 if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
6462 CnstBits = CnstBits.zextOrTrunc(64);
6463 uint64_t CnstVal = CnstBits.getZExtValue();
6464
6465 if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
6466 CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
6467 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6468 SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6469 DAG.getConstant(CnstVal, dl, MVT::i32),
6470 DAG.getConstant(0, dl, MVT::i32));
6471 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6472 }
6473
6474 if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
6475 CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
6476 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6477 SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6478 DAG.getConstant(CnstVal, dl, MVT::i32),
6479 DAG.getConstant(8, dl, MVT::i32));
6480 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6481 }
6482
6483 if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
6484 CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
6485 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6486 SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6487 DAG.getConstant(CnstVal, dl, MVT::i32),
6488 DAG.getConstant(16, dl, MVT::i32));
6489 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6490 }
6491
6492 if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
6493 CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
6494 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6495 SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6496 DAG.getConstant(CnstVal, dl, MVT::i32),
6497 DAG.getConstant(24, dl, MVT::i32));
6498 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6499 }
6500
6501 if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
6502 CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
6503 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6504 SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6505 DAG.getConstant(CnstVal, dl, MVT::i32),
6506 DAG.getConstant(0, dl, MVT::i32));
6507 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6508 }
6509
6510 if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
6511 CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
6512 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6513 SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6514 DAG.getConstant(CnstVal, dl, MVT::i32),
6515 DAG.getConstant(8, dl, MVT::i32));
6516 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6517 }
6518 }
6519
6520 if (SecondTry)
6521 goto FailedModImm;
6522 SecondTry = true;
6523 CnstBits = UndefBits;
6524 goto AttemptModImm;
6525 }
6526
6527// We can always fall back to a non-immediate OR.
6528FailedModImm:
6529 return Op;
6530}
6531
6532// Normalize the operands of BUILD_VECTOR. The value of constant operands will
6533// be truncated to fit element width.
6534static SDValue NormalizeBuildVector(SDValue Op,
6535 SelectionDAG &DAG) {
6536 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6536, __extension__ __PRETTY_FUNCTION__))
;
6537 SDLoc dl(Op);
6538 EVT VT = Op.getValueType();
6539 EVT EltTy= VT.getVectorElementType();
6540
6541 if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
6542 return Op;
6543
6544 SmallVector<SDValue, 16> Ops;
6545 for (SDValue Lane : Op->ops()) {
6546 if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
6547 APInt LowBits(EltTy.getSizeInBits(),
6548 CstLane->getZExtValue());
6549 Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
6550 }
6551 Ops.push_back(Lane);
6552 }
6553 return DAG.getBuildVector(VT, dl, Ops);
6554}
6555
6556SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
6557 SelectionDAG &DAG) const {
6558 SDLoc dl(Op);
6559 EVT VT = Op.getValueType();
6560 Op = NormalizeBuildVector(Op, DAG);
6561 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
6562
6563 APInt CnstBits(VT.getSizeInBits(), 0);
6564 APInt UndefBits(VT.getSizeInBits(), 0);
6565 if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
6566 // We make use of a little bit of goto ickiness in order to avoid having to
6567 // duplicate the immediate matching logic for the undef toggled case.
6568 bool SecondTry = false;
6569 AttemptModImm:
6570
6571 if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
6572 CnstBits = CnstBits.zextOrTrunc(64);
6573 uint64_t CnstVal = CnstBits.getZExtValue();
6574
6575 // Certain magic vector constants (used to express things like NOT
6576 // and NEG) are passed through unmodified. This allows codegen patterns
6577 // for these operations to match. Special-purpose patterns will lower
6578 // these immediates to MOVIs if it proves necessary.
6579 if (VT.isInteger() && (CnstVal == 0 || CnstVal == ~0ULL))
6580 return Op;
6581
6582 // The many faces of MOVI...
6583 if (AArch64_AM::isAdvSIMDModImmType10(CnstVal)) {
6584 CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal);
6585 if (VT.getSizeInBits() == 128) {
6586 SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64,
6587 DAG.getConstant(CnstVal, dl, MVT::i32));
6588 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6589 }
6590
6591 // Support the V64 version via subregister insertion.
6592 SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64,
6593 DAG.getConstant(CnstVal, dl, MVT::i32));
6594 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6595 }
6596
6597 if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
6598 CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
6599 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6600 SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6601 DAG.getConstant(CnstVal, dl, MVT::i32),
6602 DAG.getConstant(0, dl, MVT::i32));
6603 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6604 }
6605
6606 if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
6607 CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
6608 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6609 SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6610 DAG.getConstant(CnstVal, dl, MVT::i32),
6611 DAG.getConstant(8, dl, MVT::i32));
6612 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6613 }
6614
6615 if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
6616 CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
6617 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6618 SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6619 DAG.getConstant(CnstVal, dl, MVT::i32),
6620 DAG.getConstant(16, dl, MVT::i32));
6621 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6622 }
6623
6624 if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
6625 CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
6626 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6627 SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6628 DAG.getConstant(CnstVal, dl, MVT::i32),
6629 DAG.getConstant(24, dl, MVT::i32));
6630 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6631 }
6632
6633 if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
6634 CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
6635 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6636 SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6637 DAG.getConstant(CnstVal, dl, MVT::i32),
6638 DAG.getConstant(0, dl, MVT::i32));
6639 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6640 }
6641
6642 if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
6643 CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
6644 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6645 SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6646 DAG.getConstant(CnstVal, dl, MVT::i32),
6647 DAG.getConstant(8, dl, MVT::i32));
6648 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6649 }
6650
6651 if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
6652 CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
6653 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6654 SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
6655 DAG.getConstant(CnstVal, dl, MVT::i32),
6656 DAG.getConstant(264, dl, MVT::i32));
6657 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6658 }
6659
6660 if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
6661 CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
6662 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6663 SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
6664 DAG.getConstant(CnstVal, dl, MVT::i32),
6665 DAG.getConstant(272, dl, MVT::i32));
6666 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6667 }
6668
6669 if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) {
6670 CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal);
6671 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
6672 SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
6673 DAG.getConstant(CnstVal, dl, MVT::i32));
6674 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6675 }
6676
6677 // The few faces of FMOV...
6678 if (AArch64_AM::isAdvSIMDModImmType11(CnstVal)) {
6679 CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal);
6680 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
6681 SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
6682 DAG.getConstant(CnstVal, dl, MVT::i32));
6683 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6684 }
6685
6686 if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) &&
6687 VT.getSizeInBits() == 128) {
6688 CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
6689 SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
6690 DAG.getConstant(CnstVal, dl, MVT::i32));
6691 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6692 }
6693
6694 // The many faces of MVNI...
6695 CnstVal = ~CnstVal;
6696 if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
6697 CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
6698 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6699 SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6700 DAG.getConstant(CnstVal, dl, MVT::i32),
6701 DAG.getConstant(0, dl, MVT::i32));
6702 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6703 }
6704
6705 if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
6706 CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
6707 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6708 SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6709 DAG.getConstant(CnstVal, dl, MVT::i32),
6710 DAG.getConstant(8, dl, MVT::i32));
6711 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6712 }
6713
6714 if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
6715 CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
6716 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6717 SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6718 DAG.getConstant(CnstVal, dl, MVT::i32),
6719 DAG.getConstant(16, dl, MVT::i32));
6720 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6721 }
6722
6723 if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
6724 CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
6725 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6726 SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6727 DAG.getConstant(CnstVal, dl, MVT::i32),
6728 DAG.getConstant(24, dl, MVT::i32));
6729 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6730 }
6731
6732 if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
6733 CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
6734 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6735 SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6736 DAG.getConstant(CnstVal, dl, MVT::i32),
6737 DAG.getConstant(0, dl, MVT::i32));
6738 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6739 }
6740
6741 if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
6742 CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
6743 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6744 SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6745 DAG.getConstant(CnstVal, dl, MVT::i32),
6746 DAG.getConstant(8, dl, MVT::i32));
6747 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6748 }
6749
6750 if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
6751 CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
6752 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6753 SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
6754 DAG.getConstant(CnstVal, dl, MVT::i32),
6755 DAG.getConstant(264, dl, MVT::i32));
6756 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6757 }
6758
6759 if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
6760 CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
6761 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6762 SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
6763 DAG.getConstant(CnstVal, dl, MVT::i32),
6764 DAG.getConstant(272, dl, MVT::i32));
6765 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6766 }
6767 }
6768
6769 if (SecondTry)
6770 goto FailedModImm;
6771 SecondTry = true;
6772 CnstBits = UndefBits;
6773 goto AttemptModImm;
6774 }
6775FailedModImm:
6776
6777 // Scan through the operands to find some interesting properties we can
6778 // exploit:
6779 // 1) If only one value is used, we can use a DUP, or
6780 // 2) if only the low element is not undef, we can just insert that, or
6781 // 3) if only one constant value is used (w/ some non-constant lanes),
6782 // we can splat the constant value into the whole vector then fill
6783 // in the non-constant lanes.
6784 // 4) FIXME: If different constant values are used, but we can intelligently
6785 // select the values we'll be overwriting for the non-constant
6786 // lanes such that we can directly materialize the vector
6787 // some other way (MOVI, e.g.), we can be sneaky.
6788 unsigned NumElts = VT.getVectorNumElements();
6789 bool isOnlyLowElement = true;
6790 bool usesOnlyOneValue = true;
6791 bool usesOnlyOneConstantValue = true;
6792 bool isConstant = true;
6793 unsigned NumConstantLanes = 0;
6794 SDValue Value;
6795 SDValue ConstantValue;
6796 for (unsigned i = 0; i < NumElts; ++i) {
6797 SDValue V = Op.getOperand(i);
6798 if (V.isUndef())
6799 continue;
6800 if (i > 0)
6801 isOnlyLowElement = false;
6802 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6803 isConstant = false;
6804
6805 if (isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V)) {
6806 ++NumConstantLanes;
6807 if (!ConstantValue.getNode())
6808 ConstantValue = V;
6809 else if (ConstantValue != V)
6810 usesOnlyOneConstantValue = false;
6811 }
6812
6813 if (!Value.getNode())
6814 Value = V;
6815 else if (V != Value)
6816 usesOnlyOneValue = false;
6817 }
6818
6819 if (!Value.getNode()) {
6820 DEBUG(dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n"
; } } while (false)
;
6821 return DAG.getUNDEF(VT);
6822 }
6823
6824 if (isOnlyLowElement) {
6825 DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n"; } } while (false)
6826 "SCALAR_TO_VECTOR node\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n"; } } while (false)
;
6827 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6828 }
6829
6830 // Use DUP for non-constant splats. For f32 constant splats, reduce to
6831 // i32 and try again.
6832 if (usesOnlyOneValue) {
6833 if (!isConstant) {
6834 if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6835 Value.getValueType() != VT) {
6836 DEBUG(dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n"
; } } while (false)
;
6837 return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
6838 }
6839
6840 // This is actually a DUPLANExx operation, which keeps everything vectory.
6841
6842 SDValue Lane = Value.getOperand(1);
6843 Value = Value.getOperand(0);
6844 if (Value.getValueSizeInBits() == 64) {
6845 DEBUG(dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
6846 "widening it\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
;
6847 Value = WidenVector(Value, DAG);
6848 }
6849
6850 unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
6851 return DAG.getNode(Opcode, dl, VT, Value, Lane);
6852 }
6853
6854 if (VT.getVectorElementType().isFloatingPoint()) {
6855 SmallVector<SDValue, 8> Ops;
6856 EVT EltTy = VT.getVectorElementType();
6857 assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&(static_cast <bool> ((EltTy == MVT::f16 || EltTy == MVT
::f32 || EltTy == MVT::f64) && "Unsupported floating-point vector type"
) ? void (0) : __assert_fail ("(EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) && \"Unsupported floating-point vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6858, __extension__ __PRETTY_FUNCTION__))
6858 "Unsupported floating-point vector type")(static_cast <bool> ((EltTy == MVT::f16 || EltTy == MVT
::f32 || EltTy == MVT::f64) && "Unsupported floating-point vector type"
) ? void (0) : __assert_fail ("(EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) && \"Unsupported floating-point vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6858, __extension__ __PRETTY_FUNCTION__))
;
6859 DEBUG(dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
6860 "BITCASTS, and try again\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
;
6861 MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
6862 for (unsigned i = 0; i < NumElts; ++i)
6863 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
6864 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
6865 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6866 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
6867 dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
6868 Val.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
6869 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
;
6870 Val = LowerBUILD_VECTOR(Val, DAG);
6871 if (Val.getNode())
6872 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6873 }
6874 }
6875
6876 // If there was only one constant value used and for more than one lane,
6877 // start by splatting that value, then replace the non-constant lanes. This
6878 // is better than the default, which will perform a separate initialization
6879 // for each lane.
6880 if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
6881 SDValue Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
6882 // Now insert the non-constant lanes.
6883 for (unsigned i = 0; i < NumElts; ++i) {
6884 SDValue V = Op.getOperand(i);
6885 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
6886 if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
6887 // Note that type legalization likely mucked about with the VT of the
6888 // source operand, so we may have to convert it here before inserting.
6889 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
6890 }
6891 }
6892 return Val;
6893 }
6894
6895 // This will generate a load from the constant pool.
6896 if (isConstant) {
6897 DEBUG(dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
6898 "expansion\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
;
6899 return SDValue();
6900 }
6901
6902 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6903 if (NumElts >= 4) {
6904 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
6905 return shuffle;
6906 }
6907
6908 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6909 // know the default expansion would otherwise fall back on something even
6910 // worse. For a vector with one or two non-undef values, that's
6911 // scalar_to_vector for the elements followed by a shuffle (provided the
6912 // shuffle is valid for the target) and materialization element by element
6913 // on the stack followed by a load for everything else.
6914 if (!isConstant && !usesOnlyOneValue) {
6915 DEBUG(dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
6916 "of INSERT_VECTOR_ELT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
;
6917
6918 SDValue Vec = DAG.getUNDEF(VT);
6919 SDValue Op0 = Op.getOperand(0);
6920 unsigned i = 0;
6921
6922 // Use SCALAR_TO_VECTOR for lane zero to
6923 // a) Avoid a RMW dependency on the full vector register, and
6924 // b) Allow the register coalescer to fold away the copy if the
6925 // value is already in an S or D register, and we're forced to emit an
6926 // INSERT_SUBREG that we can't fold anywhere.
6927 //
6928 // We also allow types like i8 and i16 which are illegal scalar but legal
6929 // vector element types. After type-legalization the inserted value is
6930 // extended (i32) and it is safe to cast them to the vector type by ignoring
6931 // the upper bits of the lowest lane (e.g. v8i8, v4i16).
6932 if (!Op0.isUndef()) {
6933 DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Creating node for op0, it is not undefined:\n"
; } } while (false)
;
6934 Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
6935 ++i;
6936 }
6937 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
6938 if (i < NumElts)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
6939 dbgs() << "Creating nodes for the other vector elements:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
6940 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
;
6941 for (; i < NumElts; ++i) {
6942 SDValue V = Op.getOperand(i);
6943 if (V.isUndef())
6944 continue;
6945 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
6946 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6947 }
6948 return Vec;
6949 }
6950
6951 DEBUG(dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
6952 "better alternative\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
;
6953 return SDValue();
6954}
6955
6956SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
6957 SelectionDAG &DAG) const {
6958 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::INSERT_VECTOR_ELT
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6958, __extension__ __PRETTY_FUNCTION__))
;
6959
6960 // Check for non-constant or out of range lane.
6961 EVT VT = Op.getOperand(0).getValueType();
6962 ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
6963 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
6964 return SDValue();
6965
6966
6967 // Insertion/extraction are legal for V128 types.
6968 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
6969 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
6970 VT == MVT::v8f16)
6971 return Op;
6972
6973 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
6974 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
6975 return SDValue();
6976
6977 // For V64 types, we perform insertion by expanding the value
6978 // to a V128 type and perform the insertion on that.
6979 SDLoc DL(Op);
6980 SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
6981 EVT WideTy = WideVec.getValueType();
6982
6983 SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
6984 Op.getOperand(1), Op.getOperand(2));
6985 // Re-narrow the resultant vector.
6986 return NarrowVector(Node, DAG);
6987}
6988
6989SDValue
6990AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
6991 SelectionDAG &DAG) const {
6992 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6992, __extension__ __PRETTY_FUNCTION__))
;
6993
6994 // Check for non-constant or out of range lane.
6995 EVT VT = Op.getOperand(0).getValueType();
6996 ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6997 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
6998 return SDValue();
6999
7000
7001 // Insertion/extraction are legal for V128 types.
7002 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
7003 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
7004 VT == MVT::v8f16)
7005 return Op;
7006
7007 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
7008 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
7009 return SDValue();
7010
7011 // For V64 types, we perform extraction by expanding the value
7012 // to a V128 type and perform the extraction on that.
7013 SDLoc DL(Op);
7014 SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
7015 EVT WideTy = WideVec.getValueType();
7016
7017 EVT ExtrTy = WideTy.getVectorElementType();
7018 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
7019 ExtrTy = MVT::i32;
7020
7021 // For extractions, we just return the result directly.
7022 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
7023 Op.getOperand(1));
7024}
7025
7026SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
7027 SelectionDAG &DAG) const {
7028 EVT VT = Op.getOperand(0).getValueType();
7029 SDLoc dl(Op);
7030 // Just in case...
7031 if (!VT.isVector())
7032 return SDValue();
7033
7034 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
7035 if (!Cst)
7036 return SDValue();
7037 unsigned Val = Cst->getZExtValue();
7038
7039 unsigned Size = Op.getValueSizeInBits();
7040
7041 // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
7042 if (Val == 0)
7043 return Op;
7044
7045 // If this is extracting the upper 64-bits of a 128-bit vector, we match
7046 // that directly.
7047 if (Size == 64 && Val * VT.getScalarSizeInBits() == 64)
7048 return Op;
7049
7050 return SDValue();
7051}
7052
7053bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
7054 if (VT.getVectorNumElements() == 4 &&
7055 (VT.is128BitVector() || VT.is64BitVector())) {
7056 unsigned PFIndexes[4];
7057 for (unsigned i = 0; i != 4; ++i) {
7058 if (M[i] < 0)
7059 PFIndexes[i] = 8;
7060 else
7061 PFIndexes[i] = M[i];
7062 }
7063
7064 // Compute the index in the perfect shuffle table.
7065 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
7066 PFIndexes[2] * 9 + PFIndexes[3];
7067 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7068 unsigned Cost = (PFEntry >> 30);
7069
7070 if (Cost <= 4)
7071 return true;
7072 }
7073
7074 bool DummyBool;
7075 int DummyInt;
7076 unsigned DummyUnsigned;
7077
7078 return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
7079 isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
7080 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
7081 // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
7082 isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
7083 isZIPMask(M, VT, DummyUnsigned) ||
7084 isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
7085 isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
7086 isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
7087 isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
7088 isConcatMask(M, VT, VT.getSizeInBits() == 128));
7089}
7090
7091/// getVShiftImm - Check if this is a valid build_vector for the immediate
7092/// operand of a vector shift operation, where all the elements of the
7093/// build_vector must have the same constant integer value.
7094static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
7095 // Ignore bit_converts.
7096 while (Op.getOpcode() == ISD::BITCAST)
7097 Op = Op.getOperand(0);
7098 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
7099 APInt SplatBits, SplatUndef;
7100 unsigned SplatBitSize;
7101 bool HasAnyUndefs;
7102 if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
7103 HasAnyUndefs, ElementBits) ||
7104 SplatBitSize > ElementBits)
7105 return false;
7106 Cnt = SplatBits.getSExtValue();
7107 return true;
7108}
7109
7110/// isVShiftLImm - Check if this is a valid build_vector for the immediate
7111/// operand of a vector shift left operation. That value must be in the range:
7112/// 0 <= Value < ElementBits for a left shift; or
7113/// 0 <= Value <= ElementBits for a long left shift.
7114static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
7115 assert(VT.isVector() && "vector shift count is not a vector type")(static_cast <bool> (VT.isVector() && "vector shift count is not a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7115, __extension__ __PRETTY_FUNCTION__))
;
7116 int64_t ElementBits = VT.getScalarSizeInBits();
7117 if (!getVShiftImm(Op, ElementBits, Cnt))
7118 return false;
7119 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
7120}
7121
7122/// isVShiftRImm - Check if this is a valid build_vector for the immediate
7123/// operand of a vector shift right operation. The value must be in the range:
7124/// 1 <= Value <= ElementBits for a right shift; or
7125static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
7126 assert(VT.isVector() && "vector shift count is not a vector type")(static_cast <bool> (VT.isVector() && "vector shift count is not a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7126, __extension__ __PRETTY_FUNCTION__))
;
7127 int64_t ElementBits = VT.getScalarSizeInBits();
7128 if (!getVShiftImm(Op, ElementBits, Cnt))
7129 return false;
7130 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
7131}
7132
7133SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
7134 SelectionDAG &DAG) const {
7135 EVT VT = Op.getValueType();
7136 SDLoc DL(Op);
7137 int64_t Cnt;
7138
7139 if (!Op.getOperand(1).getValueType().isVector())
7140 return Op;
7141 unsigned EltSize = VT.getScalarSizeInBits();
7142
7143 switch (Op.getOpcode()) {
7144 default:
7145 llvm_unreachable("unexpected shift opcode")::llvm::llvm_unreachable_internal("unexpected shift opcode", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7145)
;
7146
7147 case ISD::SHL:
7148 if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
7149 return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
7150 DAG.getConstant(Cnt, DL, MVT::i32));
7151 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
7152 DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
7153 MVT::i32),
7154 Op.getOperand(0), Op.getOperand(1));
7155 case ISD::SRA:
7156 case ISD::SRL:
7157 // Right shift immediate
7158 if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
7159 unsigned Opc =
7160 (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
7161 return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
7162 DAG.getConstant(Cnt, DL, MVT::i32));
7163 }
7164
7165 // Right shift register. Note, there is not a shift right register
7166 // instruction, but the shift left register instruction takes a signed
7167 // value, where negative numbers specify a right shift.
7168 unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
7169 : Intrinsic::aarch64_neon_ushl;
7170 // negate the shift amount
7171 SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
7172 SDValue NegShiftLeft =
7173 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
7174 DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
7175 NegShift);
7176 return NegShiftLeft;
7177 }
7178
7179 return SDValue();
7180}
7181
7182static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
7183 AArch64CC::CondCode CC, bool NoNans, EVT VT,
7184 const SDLoc &dl, SelectionDAG &DAG) {
7185 EVT SrcVT = LHS.getValueType();
7186 assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&(static_cast <bool> (VT.getSizeInBits() == SrcVT.getSizeInBits
() && "function only supposed to emit natural comparisons"
) ? void (0) : __assert_fail ("VT.getSizeInBits() == SrcVT.getSizeInBits() && \"function only supposed to emit natural comparisons\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7187, __extension__ __PRETTY_FUNCTION__))
7187 "function only supposed to emit natural comparisons")(static_cast <bool> (VT.getSizeInBits() == SrcVT.getSizeInBits
() && "function only supposed to emit natural comparisons"
) ? void (0) : __assert_fail ("VT.getSizeInBits() == SrcVT.getSizeInBits() && \"function only supposed to emit natural comparisons\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7187, __extension__ __PRETTY_FUNCTION__))
;
7188
7189 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
7190 APInt CnstBits(VT.getSizeInBits(), 0);
7191 APInt UndefBits(VT.getSizeInBits(), 0);
7192 bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
7193 bool IsZero = IsCnst && (CnstBits == 0);
7194
7195 if (SrcVT.getVectorElementType().isFloatingPoint()) {
7196 switch (CC) {
7197 default:
7198 return SDValue();
7199 case AArch64CC::NE: {
7200 SDValue Fcmeq;
7201 if (IsZero)
7202 Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
7203 else
7204 Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
7205 return DAG.getNode(AArch64ISD::NOT, dl, VT, Fcmeq);
7206 }
7207 case AArch64CC::EQ:
7208 if (IsZero)
7209 return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
7210 return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
7211 case AArch64CC::GE:
7212 if (IsZero)
7213 return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
7214 return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
7215 case AArch64CC::GT:
7216 if (IsZero)
7217 return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
7218 return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
7219 case AArch64CC::LS:
7220 if (IsZero)
7221 return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
7222 return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
7223 case AArch64CC::LT:
7224 if (!NoNans)
7225 return SDValue();
7226 // If we ignore NaNs then we can use to the MI implementation.
7227 LLVM_FALLTHROUGH[[clang::fallthrough]];
7228 case AArch64CC::MI:
7229 if (IsZero)
7230 return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
7231 return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
7232 }
7233 }
7234
7235 switch (CC) {
7236 default:
7237 return SDValue();
7238 case AArch64CC::NE: {
7239 SDValue Cmeq;
7240 if (IsZero)
7241 Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
7242 else
7243 Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
7244 return DAG.getNode(AArch64ISD::NOT, dl, VT, Cmeq);
7245 }
7246 case AArch64CC::EQ:
7247 if (IsZero)
7248 return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
7249 return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
7250 case AArch64CC::GE:
7251 if (IsZero)
7252 return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
7253 return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
7254 case AArch64CC::GT:
7255 if (IsZero)
7256 return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
7257 return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
7258 case AArch64CC::LE:
7259 if (IsZero)
7260 return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
7261 return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
7262 case AArch64CC::LS:
7263 return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
7264 case AArch64CC::LO:
7265 return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
7266 case AArch64CC::LT:
7267 if (IsZero)
7268 return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
7269 return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
7270 case AArch64CC::HI:
7271 return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
7272 case AArch64CC::HS:
7273 return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
7274 }
7275}
7276
7277SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
7278 SelectionDAG &DAG) const {
7279 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7280 SDValue LHS = Op.getOperand(0);
7281 SDValue RHS = Op.getOperand(1);
7282 EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
7283 SDLoc dl(Op);
7284
7285 if (LHS.getValueType().getVectorElementType().isInteger()) {
7286 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7286, __extension__ __PRETTY_FUNCTION__))
;
7287 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
7288 SDValue Cmp =
7289 EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
7290 return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
7291 }
7292
7293 if (LHS.getValueType().getVectorElementType() == MVT::f16)
7294 return SDValue();
7295
7296 assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType().getVectorElementType
() == MVT::f32 || LHS.getValueType().getVectorElementType() ==
MVT::f64) ? void (0) : __assert_fail ("LHS.getValueType().getVectorElementType() == MVT::f32 || LHS.getValueType().getVectorElementType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7297, __extension__ __PRETTY_FUNCTION__))
7297 LHS.getValueType().getVectorElementType() == MVT::f64)(static_cast <bool> (LHS.getValueType().getVectorElementType
() == MVT::f32 || LHS.getValueType().getVectorElementType() ==
MVT::f64) ? void (0) : __assert_fail ("LHS.getValueType().getVectorElementType() == MVT::f32 || LHS.getValueType().getVectorElementType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7297, __extension__ __PRETTY_FUNCTION__))
;
7298
7299 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
7300 // clean. Some of them require two branches to implement.
7301 AArch64CC::CondCode CC1, CC2;
7302 bool ShouldInvert;
7303 changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
7304
7305 bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
7306 SDValue Cmp =
7307 EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
7308 if (!Cmp.getNode())
7309 return SDValue();
7310
7311 if (CC2 != AArch64CC::AL) {
7312 SDValue Cmp2 =
7313 EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
7314 if (!Cmp2.getNode())
7315 return SDValue();
7316
7317 Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
7318 }
7319
7320 Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
7321
7322 if (ShouldInvert)
7323 return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
7324
7325 return Cmp;
7326}
7327
7328static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
7329 SelectionDAG &DAG) {
7330 SDValue VecOp = ScalarOp.getOperand(0);
7331 auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
7332 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
7333 DAG.getConstant(0, DL, MVT::i64));
7334}
7335
7336SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
7337 SelectionDAG &DAG) const {
7338 SDLoc dl(Op);
7339 switch (Op.getOpcode()) {
7340 case ISD::VECREDUCE_ADD:
7341 return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
7342 case ISD::VECREDUCE_SMAX:
7343 return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
7344 case ISD::VECREDUCE_SMIN:
7345 return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
7346 case ISD::VECREDUCE_UMAX:
7347 return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
7348 case ISD::VECREDUCE_UMIN:
7349 return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
7350 case ISD::VECREDUCE_FMAX: {
7351 assert(Op->getFlags().hasNoNaNs() && "fmax vector reduction needs NoNaN flag")(static_cast <bool> (Op->getFlags().hasNoNaNs() &&
"fmax vector reduction needs NoNaN flag") ? void (0) : __assert_fail
("Op->getFlags().hasNoNaNs() && \"fmax vector reduction needs NoNaN flag\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7351, __extension__ __PRETTY_FUNCTION__))
;
7352 return DAG.getNode(
7353 ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
7354 DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
7355 Op.getOperand(0));
7356 }
7357 case ISD::VECREDUCE_FMIN: {
7358 assert(Op->getFlags().hasNoNaNs() && "fmin vector reduction needs NoNaN flag")(static_cast <bool> (Op->getFlags().hasNoNaNs() &&
"fmin vector reduction needs NoNaN flag") ? void (0) : __assert_fail
("Op->getFlags().hasNoNaNs() && \"fmin vector reduction needs NoNaN flag\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7358, __extension__ __PRETTY_FUNCTION__))
;
7359 return DAG.getNode(
7360 ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
7361 DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
7362 Op.getOperand(0));
7363 }
7364 default:
7365 llvm_unreachable("Unhandled reduction")::llvm::llvm_unreachable_internal("Unhandled reduction", "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7365)
;
7366 }
7367}
7368
7369/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
7370/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
7371/// specified in the intrinsic calls.
7372bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
7373 const CallInst &I,
7374 unsigned Intrinsic) const {
7375 auto &DL = I.getModule()->getDataLayout();
7376 switch (Intrinsic) {
7377 case Intrinsic::aarch64_neon_ld2:
7378 case Intrinsic::aarch64_neon_ld3:
7379 case Intrinsic::aarch64_neon_ld4:
7380 case Intrinsic::aarch64_neon_ld1x2:
7381 case Intrinsic::aarch64_neon_ld1x3:
7382 case Intrinsic::aarch64_neon_ld1x4:
7383 case Intrinsic::aarch64_neon_ld2lane:
7384 case Intrinsic::aarch64_neon_ld3lane:
7385 case Intrinsic::aarch64_neon_ld4lane:
7386 case Intrinsic::aarch64_neon_ld2r:
7387 case Intrinsic::aarch64_neon_ld3r:
7388 case Intrinsic::aarch64_neon_ld4r: {
7389 Info.opc = ISD::INTRINSIC_W_CHAIN;
7390 // Conservatively set memVT to the entire set of vectors loaded.
7391 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
7392 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
7393 Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
7394 Info.offset = 0;
7395 Info.align = 0;
7396 Info.vol = false; // volatile loads with NEON intrinsics not supported
7397 Info.readMem = true;
7398 Info.writeMem = false;
7399 return true;
7400 }
7401 case Intrinsic::aarch64_neon_st2:
7402 case Intrinsic::aarch64_neon_st3:
7403 case Intrinsic::aarch64_neon_st4:
7404 case Intrinsic::aarch64_neon_st1x2:
7405 case Intrinsic::aarch64_neon_st1x3:
7406 case Intrinsic::aarch64_neon_st1x4:
7407 case Intrinsic::aarch64_neon_st2lane:
7408 case Intrinsic::aarch64_neon_st3lane:
7409 case Intrinsic::aarch64_neon_st4lane: {
7410 Info.opc = ISD::INTRINSIC_VOID;
7411 // Conservatively set memVT to the entire set of vectors stored.
7412 unsigned NumElts = 0;
7413 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
7414 Type *ArgTy = I.getArgOperand(ArgI)->getType();
7415 if (!ArgTy->isVectorTy())
7416 break;
7417 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
7418 }
7419 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
7420 Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
7421 Info.offset = 0;
7422 Info.align = 0;
7423 Info.vol = false; // volatile stores with NEON intrinsics not supported
7424 Info.readMem = false;
7425 Info.writeMem = true;
7426 return true;
7427 }
7428 case Intrinsic::aarch64_ldaxr:
7429 case Intrinsic::aarch64_ldxr: {
7430 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
7431 Info.opc = ISD::INTRINSIC_W_CHAIN;
7432 Info.memVT = MVT::getVT(PtrTy->getElementType());
7433 Info.ptrVal = I.getArgOperand(0);
7434 Info.offset = 0;
7435 Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
7436 Info.vol = true;
7437 Info.readMem = true;
7438 Info.writeMem = false;
7439 return true;
7440 }
7441 case Intrinsic::aarch64_stlxr:
7442 case Intrinsic::aarch64_stxr: {
7443 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
7444 Info.opc = ISD::INTRINSIC_W_CHAIN;
7445 Info.memVT = MVT::getVT(PtrTy->getElementType());
7446 Info.ptrVal = I.getArgOperand(1);
7447 Info.offset = 0;
7448 Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
7449 Info.vol = true;
7450 Info.readMem = false;
7451 Info.writeMem = true;
7452 return true;
7453 }
7454 case Intrinsic::aarch64_ldaxp:
7455 case Intrinsic::aarch64_ldxp:
7456 Info.opc = ISD::INTRINSIC_W_CHAIN;
7457 Info.memVT = MVT::i128;
7458 Info.ptrVal = I.getArgOperand(0);
7459 Info.offset = 0;
7460 Info.align = 16;
7461 Info.vol = true;
7462 Info.readMem = true;
7463 Info.writeMem = false;
7464 return true;
7465 case Intrinsic::aarch64_stlxp:
7466 case Intrinsic::aarch64_stxp:
7467 Info.opc = ISD::INTRINSIC_W_CHAIN;
7468 Info.memVT = MVT::i128;
7469 Info.ptrVal = I.getArgOperand(2);
7470 Info.offset = 0;
7471 Info.align = 16;
7472 Info.vol = true;
7473 Info.readMem = false;
7474 Info.writeMem = true;
7475 return true;
7476 default:
7477 break;
7478 }
7479
7480 return false;
7481}
7482
7483// Truncations from 64-bit GPR to 32-bit GPR is free.
7484bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
7485 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
7486 return false;
7487 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
7488 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
7489 return NumBits1 > NumBits2;
7490}
7491bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
7492 if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
7493 return false;
7494 unsigned NumBits1 = VT1.getSizeInBits();
7495 unsigned NumBits2 = VT2.getSizeInBits();
7496 return NumBits1 > NumBits2;
7497}
7498
7499/// Check if it is profitable to hoist instruction in then/else to if.
7500/// Not profitable if I and it's user can form a FMA instruction
7501/// because we prefer FMSUB/FMADD.
7502bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
7503 if (I->getOpcode() != Instruction::FMul)
7504 return true;
7505
7506 if (!I->hasOneUse())
7507 return true;
7508
7509 Instruction *User = I->user_back();
7510
7511 if (User &&
7512 !(User->getOpcode() == Instruction::FSub ||
7513 User->getOpcode() == Instruction::FAdd))
7514 return true;
7515
7516 const TargetOptions &Options = getTargetMachine().Options;
7517 const DataLayout &DL = I->getModule()->getDataLayout();
7518 EVT VT = getValueType(DL, User->getOperand(0)->getType());
7519
7520 return !(isFMAFasterThanFMulAndFAdd(VT) &&
7521 isOperationLegalOrCustom(ISD::FMA, VT) &&
7522 (Options.AllowFPOpFusion == FPOpFusion::Fast ||
7523 Options.UnsafeFPMath));
7524}
7525
7526// All 32-bit GPR operations implicitly zero the high-half of the corresponding
7527// 64-bit GPR.
7528bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
7529 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
7530 return false;
7531 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
7532 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
7533 return NumBits1 == 32 && NumBits2 == 64;
7534}
7535bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
7536 if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
7537 return false;
7538 unsigned NumBits1 = VT1.getSizeInBits();
7539 unsigned NumBits2 = VT2.getSizeInBits();
7540 return NumBits1 == 32 && NumBits2 == 64;
7541}
7542
7543bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
7544 EVT VT1 = Val.getValueType();
7545 if (isZExtFree(VT1, VT2)) {
7546 return true;
7547 }
7548
7549 if (Val.getOpcode() != ISD::LOAD)
7550 return false;
7551
7552 // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
7553 return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
7554 VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
7555 VT1.getSizeInBits() <= 32);
7556}
7557
7558bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
7559 if (isa<FPExtInst>(Ext))
7560 return false;
7561
7562 // Vector types are not free.
7563 if (Ext->getType()->isVectorTy())
7564 return false;
7565
7566 for (const Use &U : Ext->uses()) {
7567 // The extension is free if we can fold it with a left shift in an
7568 // addressing mode or an arithmetic operation: add, sub, and cmp.
7569
7570 // Is there a shift?
7571 const Instruction *Instr = cast<Instruction>(U.getUser());
7572
7573 // Is this a constant shift?
7574 switch (Instr->getOpcode()) {
7575 case Instruction::Shl:
7576 if (!isa<ConstantInt>(Instr->getOperand(1)))
7577 return false;
7578 break;
7579 case Instruction::GetElementPtr: {
7580 gep_type_iterator GTI = gep_type_begin(Instr);
7581 auto &DL = Ext->getModule()->getDataLayout();
7582 std::advance(GTI, U.getOperandNo()-1);
7583 Type *IdxTy = GTI.getIndexedType();
7584 // This extension will end up with a shift because of the scaling factor.
7585 // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
7586 // Get the shift amount based on the scaling factor:
7587 // log2(sizeof(IdxTy)) - log2(8).
7588 uint64_t ShiftAmt =
7589 countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3;
7590 // Is the constant foldable in the shift of the addressing mode?
7591 // I.e., shift amount is between 1 and 4 inclusive.
7592 if (ShiftAmt == 0 || ShiftAmt > 4)
7593 return false;
7594 break;
7595 }
7596 case Instruction::Trunc:
7597 // Check if this is a noop.
7598 // trunc(sext ty1 to ty2) to ty1.
7599 if (Instr->getType() == Ext->getOperand(0)->getType())
7600 continue;
7601 LLVM_FALLTHROUGH[[clang::fallthrough]];
7602 default:
7603 return false;
7604 }
7605
7606 // At this point we can use the bfm family, so this extension is free
7607 // for that use.
7608 }
7609 return true;
7610}
7611
7612bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
7613 unsigned &RequiredAligment) const {
7614 if (!LoadedType.isSimple() ||
7615 (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
7616 return false;
7617 // Cyclone supports unaligned accesses.
7618 RequiredAligment = 0;
7619 unsigned NumBits = LoadedType.getSizeInBits();
7620 return NumBits == 32 || NumBits == 64;
7621}
7622
7623/// A helper function for determining the number of interleaved accesses we
7624/// will generate when lowering accesses of the given type.
7625unsigned
7626AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
7627 const DataLayout &DL) const {
7628 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
7629}
7630
7631MachineMemOperand::Flags
7632AArch64TargetLowering::getMMOFlags(const Instruction &I) const {
7633 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
7634 I.getMetadata(FALKOR_STRIDED_ACCESS_MD"falkor.strided.access") != nullptr)
7635 return MOStridedAccess;
7636 return MachineMemOperand::MONone;
7637}
7638
7639bool AArch64TargetLowering::isLegalInterleavedAccessType(
7640 VectorType *VecTy, const DataLayout &DL) const {
7641
7642 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
7643 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
7644
7645 // Ensure the number of vector elements is greater than 1.
7646 if (VecTy->getNumElements() < 2)
7647 return false;
7648
7649 // Ensure the element type is legal.
7650 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
7651 return false;
7652
7653 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
7654 // 128 will be split into multiple interleaved accesses.
7655 return VecSize == 64 || VecSize % 128 == 0;
7656}
7657
7658/// \brief Lower an interleaved load into a ldN intrinsic.
7659///
7660/// E.g. Lower an interleaved load (Factor = 2):
7661/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
7662/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
7663/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
7664///
7665/// Into:
7666/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
7667/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
7668/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
7669bool AArch64TargetLowering::lowerInterleavedLoad(
7670 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
7671 ArrayRef<unsigned> Indices, unsigned Factor) const {
7672 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7673, __extension__ __PRETTY_FUNCTION__))
7673 "Invalid interleave factor")(static_cast <bool> (Factor >= 2 && Factor <=
getMaxSupportedInterleaveFactor() && "Invalid interleave factor"
) ? void (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7673, __extension__ __PRETTY_FUNCTION__))
;
7674 assert(!Shuffles.empty() && "Empty shufflevector input")(static_cast <bool> (!Shuffles.empty() && "Empty shufflevector input"
) ? void (0) : __assert_fail ("!Shuffles.empty() && \"Empty shufflevector input\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7674, __extension__ __PRETTY_FUNCTION__))
;
7675 assert(Shuffles.size() == Indices.size() &&(static_cast <bool> (Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices") ? void (0)
: __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7676, __extension__ __PRETTY_FUNCTION__))
7676 "Unmatched number of shufflevectors and indices")(static_cast <bool> (Shuffles.size() == Indices.size() &&
"Unmatched number of shufflevectors and indices") ? void (0)
: __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "/build/llvm-toolchain-snapshot-6.0~svn320417/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7676, __extension__ __PRETTY_FUNCTION__))
;
7677
7678 const DataLayout &DL = LI->getModule()->getDataLayout();
7679
7680 VectorType *VecTy = Shuffles[0]->getType();
7681
7682 // Skip if we do not have NEON and skip illegal vector types. We can
7683 // "legalize" wide vector types into multiple interleaved accesses as long as
7684 // the vector types are divisible by 128.
7685 if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
7686 return false;
7687
7688 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
7689
7690 // A pointer vector can not be the return type of the ldN intrinsics. Need to
7691 // load integer vectors first and then convert to pointer vectors.
7692 Type *EltTy = VecTy->getVectorElementType();
7693 if (EltTy->isPointerTy())
7694 VecTy =
7695 VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
7696
7697 IRBuilder<> Builder(LI);
7698
7699 // The base address of the load.
7700 Value *BaseAddr = LI->getPointerOperand();
7701
7702 if (NumLoads > 1) {
7703 // If we're going to generate more than one load, reset the sub-vector type
7704 // to something legal.
7705 VecTy = VectorType::get(VecTy->getVectorElementType(),
7706 VecTy->getVectorNumElements() / NumLoads);
7707
7708 // We will compute the pointer operand of each load from the original base
7709 // address using GEPs. Cast the base address to a pointer to the scalar
7710 // element type.
7711 BaseAddr = Builder.CreateBitCast(
7712 BaseAddr, VecTy->getVectorElementType()->getPointerTo(
7713 LI->getPointerAddressSpace()));
7714 }
7715
7716 Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
7717 Type *Tys[2] = {VecTy, PtrTy};
7718 static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
7719 Intrinsic::aarch64_neon_ld3,
7720 Intrinsic::aarch64_neon_ld4};
7721 Function *LdNFunc =
7722 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
7723
7724 // Holds sub-vectors extracted from the load intrinsic return values. The
7725 // sub-vectors are associated with the shufflevector instructions they will
7726 // replace.
7727 DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
7728
7729 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
7730
7731 // If we're generating more than one load, compute the base address of
7732 // subsequent loads as an offset from the previous.
7733 if (LoadCount > 0)
7734 BaseAddr = Builder.CreateConstGEP1_32(
7735 BaseAddr, VecTy->getVectorNumElements() * Factor);
7736
7737 CallInst *LdN = Builder.CreateCall(
7738 LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
7739
7740 // Extract and store the sub-vectors returned by the load intrinsic.
7741 for (unsigned i = 0; i < Shuffles.size(); i++) {
7742 ShuffleVectorInst *SVI = Shuffles[i];
7743 unsigned Index = Indices[i];
7744
7745 Value *SubVec = Builder.CreateExtractValue(LdN, Index);
7746
7747 // Convert the integer vector to pointer vector if the element is pointer.
7748 if (EltTy->isPointerTy())
7749 SubVec = Builder.CreateIntToPtr(
7750 SubVec, VectorType::get(SVI->getType()->getVectorElementType(),
7751 VecTy->getVectorNumElements()));
7752 SubVecs[SVI].push_back(SubVec);
7753 }
7754 }
7755
7756 // Replace uses of the shufflevector instructions with the sub-vectors
7757 // returned by the load intrinsic. If a shufflevector instruction is
7758 // associated with more than one sub-vector, those sub-vectors will be
7759 // concatenated into a single wide vector.
7760 for (ShuffleVectorInst *SVI : Shuffles) {
7761 auto &SubVec = SubVecs[SVI];
7762 auto *WideVec =
7763 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
7764 SVI->replaceAllUsesWith(WideVec);
7765 }
7766
7767 return true;
7768}
7769