Bug Summary

File:lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 8165, column 48
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the AArch64TargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64ISelLowering.h"
15#include "AArch64CallingConvention.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/CallingConvLower.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstr.h"
38#include "llvm/CodeGen/MachineInstrBuilder.h"
39#include "llvm/CodeGen/MachineMemOperand.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/MachineValueType.h"
42#include "llvm/CodeGen/RuntimeLibcalls.h"
43#include "llvm/CodeGen/SelectionDAG.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/ValueTypes.h"
46#include "llvm/IR/Attributes.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/DebugLoc.h"
50#include "llvm/IR/DerivedTypes.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/GetElementPtrTypeIterator.h"
53#include "llvm/IR/GlobalValue.h"
54#include "llvm/IR/IRBuilder.h"
55#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Instructions.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/OperandTraits.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/Value.h"
63#include "llvm/MC/MCRegisterInfo.h"
64#include "llvm/Support/Casting.h"
65#include "llvm/Support/CodeGen.h"
66#include "llvm/Support/CommandLine.h"
67#include "llvm/Support/Compiler.h"
68#include "llvm/Support/Debug.h"
69#include "llvm/Support/ErrorHandling.h"
70#include "llvm/Support/KnownBits.h"
71#include "llvm/Support/MathExtras.h"
72#include "llvm/Support/raw_ostream.h"
73#include "llvm/Target/TargetCallingConv.h"
74#include "llvm/Target/TargetInstrInfo.h"
75#include "llvm/Target/TargetMachine.h"
76#include "llvm/Target/TargetOptions.h"
77#include <algorithm>
78#include <bitset>
79#include <cassert>
80#include <cctype>
81#include <cstdint>
82#include <cstdlib>
83#include <iterator>
84#include <limits>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90
91#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
92
93STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
94STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts", {0}, false}
;
95STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized", {0}, false}
;
96
97static cl::opt<bool>
98EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
99 cl::desc("Allow AArch64 SLI/SRI formation"),
100 cl::init(false));
101
102// FIXME: The necessary dtprel relocations don't seem to be supported
103// well in the GNU bfd and gold linkers at the moment. Therefore, by
104// default, for now, fall back to GeneralDynamic code generation.
105cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
106 "aarch64-elf-ldtls-generation", cl::Hidden,
107 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
108 cl::init(false));
109
110static cl::opt<bool>
111EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
112 cl::desc("Enable AArch64 logical imm instruction "
113 "optimization"),
114 cl::init(true));
115
116/// Value type used for condition codes.
117static const MVT MVT_CC = MVT::i32;
118
119AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
120 const AArch64Subtarget &STI)
121 : TargetLowering(TM), Subtarget(&STI) {
122 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
123 // we have to make something up. Arbitrarily, choose ZeroOrOne.
124 setBooleanContents(ZeroOrOneBooleanContent);
125 // When comparing vectors the result sets the different elements in the
126 // vector to all-one or all-zero.
127 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
128
129 // Set up the register classes.
130 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
131 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
132
133 if (Subtarget->hasFPARMv8()) {
134 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
135 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
136 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
137 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
138 }
139
140 if (Subtarget->hasNEON()) {
141 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
142 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
143 // Someone set us up the NEON.
144 addDRTypeForNEON(MVT::v2f32);
145 addDRTypeForNEON(MVT::v8i8);
146 addDRTypeForNEON(MVT::v4i16);
147 addDRTypeForNEON(MVT::v2i32);
148 addDRTypeForNEON(MVT::v1i64);
149 addDRTypeForNEON(MVT::v1f64);
150 addDRTypeForNEON(MVT::v4f16);
151
152 addQRTypeForNEON(MVT::v4f32);
153 addQRTypeForNEON(MVT::v2f64);
154 addQRTypeForNEON(MVT::v16i8);
155 addQRTypeForNEON(MVT::v8i16);
156 addQRTypeForNEON(MVT::v4i32);
157 addQRTypeForNEON(MVT::v2i64);
158 addQRTypeForNEON(MVT::v8f16);
159 }
160
161 // Compute derived properties from the register classes
162 computeRegisterProperties(Subtarget->getRegisterInfo());
163
164 // Provide all sorts of operation actions
165 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
166 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
167 setOperationAction(ISD::SETCC, MVT::i32, Custom);
168 setOperationAction(ISD::SETCC, MVT::i64, Custom);
169 setOperationAction(ISD::SETCC, MVT::f16, Custom);
170 setOperationAction(ISD::SETCC, MVT::f32, Custom);
171 setOperationAction(ISD::SETCC, MVT::f64, Custom);
172 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
173 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
174 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
175 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
176 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
177 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
178 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
179 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
180 setOperationAction(ISD::SELECT, MVT::i32, Custom);
181 setOperationAction(ISD::SELECT, MVT::i64, Custom);
182 setOperationAction(ISD::SELECT, MVT::f16, Custom);
183 setOperationAction(ISD::SELECT, MVT::f32, Custom);
184 setOperationAction(ISD::SELECT, MVT::f64, Custom);
185 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
186 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
187 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
188 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
189 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
190 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
191 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
192
193 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
194 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
195 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
196
197 setOperationAction(ISD::FREM, MVT::f32, Expand);
198 setOperationAction(ISD::FREM, MVT::f64, Expand);
199 setOperationAction(ISD::FREM, MVT::f80, Expand);
200
201 // Custom lowering hooks are needed for XOR
202 // to fold it into CSINC/CSINV.
203 setOperationAction(ISD::XOR, MVT::i32, Custom);
204 setOperationAction(ISD::XOR, MVT::i64, Custom);
205
206 // Virtually no operation on f128 is legal, but LLVM can't expand them when
207 // there's a valid register class, so we need custom operations in most cases.
208 setOperationAction(ISD::FABS, MVT::f128, Expand);
209 setOperationAction(ISD::FADD, MVT::f128, Custom);
210 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
211 setOperationAction(ISD::FCOS, MVT::f128, Expand);
212 setOperationAction(ISD::FDIV, MVT::f128, Custom);
213 setOperationAction(ISD::FMA, MVT::f128, Expand);
214 setOperationAction(ISD::FMUL, MVT::f128, Custom);
215 setOperationAction(ISD::FNEG, MVT::f128, Expand);
216 setOperationAction(ISD::FPOW, MVT::f128, Expand);
217 setOperationAction(ISD::FREM, MVT::f128, Expand);
218 setOperationAction(ISD::FRINT, MVT::f128, Expand);
219 setOperationAction(ISD::FSIN, MVT::f128, Expand);
220 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
221 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
222 setOperationAction(ISD::FSUB, MVT::f128, Custom);
223 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
224 setOperationAction(ISD::SETCC, MVT::f128, Custom);
225 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
226 setOperationAction(ISD::SELECT, MVT::f128, Custom);
227 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
228 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
229
230 // Lowering for many of the conversions is actually specified by the non-f128
231 // type. The LowerXXX function will be trivial when f128 isn't involved.
232 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
233 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
234 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
235 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
236 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
237 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
238 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
239 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
240 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
241 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
242 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
243 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
244 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
245 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
246
247 // Variable arguments.
248 setOperationAction(ISD::VASTART, MVT::Other, Custom);
249 setOperationAction(ISD::VAARG, MVT::Other, Custom);
250 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
251 setOperationAction(ISD::VAEND, MVT::Other, Expand);
252
253 // Variable-sized objects.
254 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
255 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
256 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
257
258 // Constant pool entries
259 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
260
261 // BlockAddress
262 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
263
264 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
265 setOperationAction(ISD::ADDC, MVT::i32, Custom);
266 setOperationAction(ISD::ADDE, MVT::i32, Custom);
267 setOperationAction(ISD::SUBC, MVT::i32, Custom);
268 setOperationAction(ISD::SUBE, MVT::i32, Custom);
269 setOperationAction(ISD::ADDC, MVT::i64, Custom);
270 setOperationAction(ISD::ADDE, MVT::i64, Custom);
271 setOperationAction(ISD::SUBC, MVT::i64, Custom);
272 setOperationAction(ISD::SUBE, MVT::i64, Custom);
273
274 // AArch64 lacks both left-rotate and popcount instructions.
275 setOperationAction(ISD::ROTL, MVT::i32, Expand);
276 setOperationAction(ISD::ROTL, MVT::i64, Expand);
277 for (MVT VT : MVT::vector_valuetypes()) {
278 setOperationAction(ISD::ROTL, VT, Expand);
279 setOperationAction(ISD::ROTR, VT, Expand);
280 }
281
282 // AArch64 doesn't have {U|S}MUL_LOHI.
283 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
284 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
285
286 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
287 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
288
289 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
290 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
291 for (MVT VT : MVT::vector_valuetypes()) {
292 setOperationAction(ISD::SDIVREM, VT, Expand);
293 setOperationAction(ISD::UDIVREM, VT, Expand);
294 }
295 setOperationAction(ISD::SREM, MVT::i32, Expand);
296 setOperationAction(ISD::SREM, MVT::i64, Expand);
297 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
298 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
299 setOperationAction(ISD::UREM, MVT::i32, Expand);
300 setOperationAction(ISD::UREM, MVT::i64, Expand);
301
302 // Custom lower Add/Sub/Mul with overflow.
303 setOperationAction(ISD::SADDO, MVT::i32, Custom);
304 setOperationAction(ISD::SADDO, MVT::i64, Custom);
305 setOperationAction(ISD::UADDO, MVT::i32, Custom);
306 setOperationAction(ISD::UADDO, MVT::i64, Custom);
307 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
308 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
309 setOperationAction(ISD::USUBO, MVT::i32, Custom);
310 setOperationAction(ISD::USUBO, MVT::i64, Custom);
311 setOperationAction(ISD::SMULO, MVT::i32, Custom);
312 setOperationAction(ISD::SMULO, MVT::i64, Custom);
313 setOperationAction(ISD::UMULO, MVT::i32, Custom);
314 setOperationAction(ISD::UMULO, MVT::i64, Custom);
315
316 setOperationAction(ISD::FSIN, MVT::f32, Expand);
317 setOperationAction(ISD::FSIN, MVT::f64, Expand);
318 setOperationAction(ISD::FCOS, MVT::f32, Expand);
319 setOperationAction(ISD::FCOS, MVT::f64, Expand);
320 setOperationAction(ISD::FPOW, MVT::f32, Expand);
321 setOperationAction(ISD::FPOW, MVT::f64, Expand);
322 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
323 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
324 if (Subtarget->hasFullFP16())
325 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
326 else
327 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
328
329 setOperationAction(ISD::FREM, MVT::f16, Promote);
330 setOperationAction(ISD::FREM, MVT::v4f16, Promote);
331 setOperationAction(ISD::FREM, MVT::v8f16, Promote);
332 setOperationAction(ISD::FPOW, MVT::f16, Promote);
333 setOperationAction(ISD::FPOW, MVT::v4f16, Promote);
334 setOperationAction(ISD::FPOW, MVT::v8f16, Promote);
335 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
336 setOperationAction(ISD::FCOS, MVT::f16, Promote);
337 setOperationAction(ISD::FCOS, MVT::v4f16, Promote);
338 setOperationAction(ISD::FCOS, MVT::v8f16, Promote);
339 setOperationAction(ISD::FSIN, MVT::f16, Promote);
340 setOperationAction(ISD::FSIN, MVT::v4f16, Promote);
341 setOperationAction(ISD::FSIN, MVT::v8f16, Promote);
342 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
343 setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
344 setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
345 setOperationAction(ISD::FEXP, MVT::f16, Promote);
346 setOperationAction(ISD::FEXP, MVT::v4f16, Promote);
347 setOperationAction(ISD::FEXP, MVT::v8f16, Promote);
348 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
349 setOperationAction(ISD::FEXP2, MVT::v4f16, Promote);
350 setOperationAction(ISD::FEXP2, MVT::v8f16, Promote);
351 setOperationAction(ISD::FLOG, MVT::f16, Promote);
352 setOperationAction(ISD::FLOG, MVT::v4f16, Promote);
353 setOperationAction(ISD::FLOG, MVT::v8f16, Promote);
354 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
355 setOperationAction(ISD::FLOG2, MVT::v4f16, Promote);
356 setOperationAction(ISD::FLOG2, MVT::v8f16, Promote);
357 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
358 setOperationAction(ISD::FLOG10, MVT::v4f16, Promote);
359 setOperationAction(ISD::FLOG10, MVT::v8f16, Promote);
360
361 if (!Subtarget->hasFullFP16()) {
362 setOperationAction(ISD::SELECT, MVT::f16, Promote);
363 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
364 setOperationAction(ISD::SETCC, MVT::f16, Promote);
365 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
366 setOperationAction(ISD::FADD, MVT::f16, Promote);
367 setOperationAction(ISD::FSUB, MVT::f16, Promote);
368 setOperationAction(ISD::FMUL, MVT::f16, Promote);
369 setOperationAction(ISD::FDIV, MVT::f16, Promote);
370 setOperationAction(ISD::FMA, MVT::f16, Promote);
371 setOperationAction(ISD::FNEG, MVT::f16, Promote);
372 setOperationAction(ISD::FABS, MVT::f16, Promote);
373 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
374 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
375 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
376 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
377 setOperationAction(ISD::FRINT, MVT::f16, Promote);
378 setOperationAction(ISD::FROUND, MVT::f16, Promote);
379 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
380 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
381 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
382 setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
383 setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
384
385 // promote v4f16 to v4f32 when that is known to be safe.
386 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
387 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
388 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
389 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
390 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
391 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
392 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
393 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
394 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
395 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
396 AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
397 AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
398
399 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
400 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
401 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
402 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
403 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
404 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
405 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
406 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
407 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
408 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
409 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
410 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
411 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
412 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
413 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
414
415 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
416 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
417 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
418 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
419 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
420 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
421 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
422 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
423 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
424 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
425 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
426 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
427 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
428 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
429 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
430 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
431 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
432 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
433 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
434 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
435 }
436
437 // AArch64 has implementations of a lot of rounding-like FP operations.
438 for (MVT Ty : {MVT::f32, MVT::f64}) {
439 setOperationAction(ISD::FFLOOR, Ty, Legal);
440 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
441 setOperationAction(ISD::FCEIL, Ty, Legal);
442 setOperationAction(ISD::FRINT, Ty, Legal);
443 setOperationAction(ISD::FTRUNC, Ty, Legal);
444 setOperationAction(ISD::FROUND, Ty, Legal);
445 setOperationAction(ISD::FMINNUM, Ty, Legal);
446 setOperationAction(ISD::FMAXNUM, Ty, Legal);
447 setOperationAction(ISD::FMINNAN, Ty, Legal);
448 setOperationAction(ISD::FMAXNAN, Ty, Legal);
449 }
450
451 if (Subtarget->hasFullFP16()) {
452 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
453 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
454 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
455 setOperationAction(ISD::FRINT, MVT::f16, Legal);
456 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
457 setOperationAction(ISD::FROUND, MVT::f16, Legal);
458 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
459 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
460 setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
461 setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
462 }
463
464 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
465
466 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
467
468 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
469 // This requires the Performance Monitors extension.
470 if (Subtarget->hasPerfMon())
471 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
472
473 if (Subtarget->isTargetMachO()) {
474 // For iOS, we don't want to the normal expansion of a libcall to
475 // sincos. We want to issue a libcall to __sincos_stret to avoid memory
476 // traffic.
477 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
478 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
479 } else {
480 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
481 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
482 }
483
484 // Make floating-point constants legal for the large code model, so they don't
485 // become loads from the constant pool.
486 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
487 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
488 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
489 }
490
491 // AArch64 does not have floating-point extending loads, i1 sign-extending
492 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
493 for (MVT VT : MVT::fp_valuetypes()) {
494 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
495 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
496 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
497 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
498 }
499 for (MVT VT : MVT::integer_valuetypes())
500 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
501
502 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
503 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
504 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
505 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
506 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
507 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
508 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
509
510 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
511 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
512
513 // Indexed loads and stores are supported.
514 for (unsigned im = (unsigned)ISD::PRE_INC;
515 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
516 setIndexedLoadAction(im, MVT::i8, Legal);
517 setIndexedLoadAction(im, MVT::i16, Legal);
518 setIndexedLoadAction(im, MVT::i32, Legal);
519 setIndexedLoadAction(im, MVT::i64, Legal);
520 setIndexedLoadAction(im, MVT::f64, Legal);
521 setIndexedLoadAction(im, MVT::f32, Legal);
522 setIndexedLoadAction(im, MVT::f16, Legal);
523 setIndexedStoreAction(im, MVT::i8, Legal);
524 setIndexedStoreAction(im, MVT::i16, Legal);
525 setIndexedStoreAction(im, MVT::i32, Legal);
526 setIndexedStoreAction(im, MVT::i64, Legal);
527 setIndexedStoreAction(im, MVT::f64, Legal);
528 setIndexedStoreAction(im, MVT::f32, Legal);
529 setIndexedStoreAction(im, MVT::f16, Legal);
530 }
531
532 // Trap.
533 setOperationAction(ISD::TRAP, MVT::Other, Legal);
534
535 // We combine OR nodes for bitfield operations.
536 setTargetDAGCombine(ISD::OR);
537
538 // Vector add and sub nodes may conceal a high-half opportunity.
539 // Also, try to fold ADD into CSINC/CSINV..
540 setTargetDAGCombine(ISD::ADD);
541 setTargetDAGCombine(ISD::SUB);
542 setTargetDAGCombine(ISD::SRL);
543 setTargetDAGCombine(ISD::XOR);
544 setTargetDAGCombine(ISD::SINT_TO_FP);
545 setTargetDAGCombine(ISD::UINT_TO_FP);
546
547 setTargetDAGCombine(ISD::FP_TO_SINT);
548 setTargetDAGCombine(ISD::FP_TO_UINT);
549 setTargetDAGCombine(ISD::FDIV);
550
551 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
552
553 setTargetDAGCombine(ISD::ANY_EXTEND);
554 setTargetDAGCombine(ISD::ZERO_EXTEND);
555 setTargetDAGCombine(ISD::SIGN_EXTEND);
556 setTargetDAGCombine(ISD::BITCAST);
557 setTargetDAGCombine(ISD::CONCAT_VECTORS);
558 setTargetDAGCombine(ISD::STORE);
559 if (Subtarget->supportsAddressTopByteIgnored())
560 setTargetDAGCombine(ISD::LOAD);
561
562 setTargetDAGCombine(ISD::MUL);
563
564 setTargetDAGCombine(ISD::SELECT);
565 setTargetDAGCombine(ISD::VSELECT);
566
567 setTargetDAGCombine(ISD::INTRINSIC_VOID);
568 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
569 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
570
571 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
572 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
573 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
574
575 setStackPointerRegisterToSaveRestore(AArch64::SP);
576
577 setSchedulingPreference(Sched::Hybrid);
578
579 EnableExtLdPromotion = true;
580
581 // Set required alignment.
582 setMinFunctionAlignment(2);
583 // Set preferred alignments.
584 setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
585 setPrefLoopAlignment(STI.getPrefLoopAlignment());
586
587 // Only change the limit for entries in a jump table if specified by
588 // the subtarget, but not at the command line.
589 unsigned MaxJT = STI.getMaximumJumpTableSize();
590 if (MaxJT && getMaximumJumpTableSize() == 0)
591 setMaximumJumpTableSize(MaxJT);
592
593 setHasExtractBitsInsn(true);
594
595 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
596
597 if (Subtarget->hasNEON()) {
598 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
599 // silliness like this:
600 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
601 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
602 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
603 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
604 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
605 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
606 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
607 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
608 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
609 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
610 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
611 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
612 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
613 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
614 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
615 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
616 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
617 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
618 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
619 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
620 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
621 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
622 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
623 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
624 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
625
626 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
627 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
628 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
629 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
630 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
631
632 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
633
634 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
635 // elements smaller than i32, so promote the input to i32 first.
636 setOperationAction(ISD::UINT_TO_FP, MVT::v4i8, Promote);
637 setOperationAction(ISD::SINT_TO_FP, MVT::v4i8, Promote);
638 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Promote);
639 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Promote);
640 // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
641 // -> v8f16 conversions.
642 setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Promote);
643 setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Promote);
644 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Promote);
645 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Promote);
646 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
647 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
648 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
649 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
650 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
651 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
652 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
653 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
654 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
655
656 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
657 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
658
659 setOperationAction(ISD::CTTZ, MVT::v2i8, Expand);
660 setOperationAction(ISD::CTTZ, MVT::v4i16, Expand);
661 setOperationAction(ISD::CTTZ, MVT::v2i32, Expand);
662 setOperationAction(ISD::CTTZ, MVT::v1i64, Expand);
663 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
664 setOperationAction(ISD::CTTZ, MVT::v8i16, Expand);
665 setOperationAction(ISD::CTTZ, MVT::v4i32, Expand);
666 setOperationAction(ISD::CTTZ, MVT::v2i64, Expand);
667
668 // AArch64 doesn't have MUL.2d:
669 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
670 // Custom handling for some quad-vector types to detect MULL.
671 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
672 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
673 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
674
675 // Vector reductions
676 for (MVT VT : MVT::integer_valuetypes()) {
677 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
678 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
679 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
680 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
681 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
682 }
683 for (MVT VT : MVT::fp_valuetypes()) {
684 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
685 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
686 }
687
688 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
689 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
690 // Likewise, narrowing and extending vector loads/stores aren't handled
691 // directly.
692 for (MVT VT : MVT::vector_valuetypes()) {
693 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
694
695 setOperationAction(ISD::MULHS, VT, Expand);
696 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
697 setOperationAction(ISD::MULHU, VT, Expand);
698 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
699
700 setOperationAction(ISD::BSWAP, VT, Expand);
701
702 for (MVT InnerVT : MVT::vector_valuetypes()) {
703 setTruncStoreAction(VT, InnerVT, Expand);
704 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
705 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
706 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
707 }
708 }
709
710 // AArch64 has implementations of a lot of rounding-like FP operations.
711 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
712 setOperationAction(ISD::FFLOOR, Ty, Legal);
713 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
714 setOperationAction(ISD::FCEIL, Ty, Legal);
715 setOperationAction(ISD::FRINT, Ty, Legal);
716 setOperationAction(ISD::FTRUNC, Ty, Legal);
717 setOperationAction(ISD::FROUND, Ty, Legal);
718 }
719 }
720
721 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
722}
723
724void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
725 if (VT == MVT::v2f32 || VT == MVT::v4f16) {
726 setOperationAction(ISD::LOAD, VT, Promote);
727 AddPromotedToType(ISD::LOAD, VT, MVT::v2i32);
728
729 setOperationAction(ISD::STORE, VT, Promote);
730 AddPromotedToType(ISD::STORE, VT, MVT::v2i32);
731 } else if (VT == MVT::v2f64 || VT == MVT::v4f32 || VT == MVT::v8f16) {
732 setOperationAction(ISD::LOAD, VT, Promote);
733 AddPromotedToType(ISD::LOAD, VT, MVT::v2i64);
734
735 setOperationAction(ISD::STORE, VT, Promote);
736 AddPromotedToType(ISD::STORE, VT, MVT::v2i64);
737 }
738
739 // Mark vector float intrinsics as expand.
740 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
741 setOperationAction(ISD::FSIN, VT, Expand);
742 setOperationAction(ISD::FCOS, VT, Expand);
743 setOperationAction(ISD::FPOW, VT, Expand);
744 setOperationAction(ISD::FLOG, VT, Expand);
745 setOperationAction(ISD::FLOG2, VT, Expand);
746 setOperationAction(ISD::FLOG10, VT, Expand);
747 setOperationAction(ISD::FEXP, VT, Expand);
748 setOperationAction(ISD::FEXP2, VT, Expand);
749
750 // But we do support custom-lowering for FCOPYSIGN.
751 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
752 }
753
754 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
755 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
756 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
757 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
758 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
759 setOperationAction(ISD::SRA, VT, Custom);
760 setOperationAction(ISD::SRL, VT, Custom);
761 setOperationAction(ISD::SHL, VT, Custom);
762 setOperationAction(ISD::AND, VT, Custom);
763 setOperationAction(ISD::OR, VT, Custom);
764 setOperationAction(ISD::SETCC, VT, Custom);
765 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
766
767 setOperationAction(ISD::SELECT, VT, Expand);
768 setOperationAction(ISD::SELECT_CC, VT, Expand);
769 setOperationAction(ISD::VSELECT, VT, Expand);
770 for (MVT InnerVT : MVT::all_valuetypes())
771 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
772
773 // CNT supports only B element sizes.
774 if (VT != MVT::v8i8 && VT != MVT::v16i8)
775 setOperationAction(ISD::CTPOP, VT, Expand);
776
777 setOperationAction(ISD::UDIV, VT, Expand);
778 setOperationAction(ISD::SDIV, VT, Expand);
779 setOperationAction(ISD::UREM, VT, Expand);
780 setOperationAction(ISD::SREM, VT, Expand);
781 setOperationAction(ISD::FREM, VT, Expand);
782
783 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
784 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
785
786 if (!VT.isFloatingPoint())
787 setOperationAction(ISD::ABS, VT, Legal);
788
789 // [SU][MIN|MAX] are available for all NEON types apart from i64.
790 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
791 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
792 setOperationAction(Opcode, VT, Legal);
793
794 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
795 if (VT.isFloatingPoint() &&
796 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
797 for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
798 ISD::FMINNUM, ISD::FMAXNUM})
799 setOperationAction(Opcode, VT, Legal);
800
801 if (Subtarget->isLittleEndian()) {
802 for (unsigned im = (unsigned)ISD::PRE_INC;
803 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
804 setIndexedLoadAction(im, VT, Legal);
805 setIndexedStoreAction(im, VT, Legal);
806 }
807 }
808}
809
810void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
811 addRegisterClass(VT, &AArch64::FPR64RegClass);
812 addTypeForNEON(VT, MVT::v2i32);
813}
814
815void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
816 addRegisterClass(VT, &AArch64::FPR128RegClass);
817 addTypeForNEON(VT, MVT::v4i32);
818}
819
820EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
821 EVT VT) const {
822 if (!VT.isVector())
823 return MVT::i32;
824 return VT.changeVectorElementTypeToInteger();
825}
826
827static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
828 const APInt &Demanded,
829 TargetLowering::TargetLoweringOpt &TLO,
830 unsigned NewOpc) {
831 uint64_t OldImm = Imm, NewImm, Enc;
832 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
833
834 // Return if the immediate is already all zeros, all ones, a bimm32 or a
835 // bimm64.
836 if (Imm == 0 || Imm == Mask ||
837 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
838 return false;
839
840 unsigned EltSize = Size;
841 uint64_t DemandedBits = Demanded.getZExtValue();
842
843 // Clear bits that are not demanded.
844 Imm &= DemandedBits;
845
846 while (true) {
847 // The goal here is to set the non-demanded bits in a way that minimizes
848 // the number of switching between 0 and 1. In order to achieve this goal,
849 // we set the non-demanded bits to the value of the preceding demanded bits.
850 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
851 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
852 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
853 // The final result is 0b11000011.
854 uint64_t NonDemandedBits = ~DemandedBits;
855 uint64_t InvertedImm = ~Imm & DemandedBits;
856 uint64_t RotatedImm =
857 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
858 NonDemandedBits;
859 uint64_t Sum = RotatedImm + NonDemandedBits;
860 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
861 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
862 NewImm = (Imm | Ones) & Mask;
863
864 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
865 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
866 // we halve the element size and continue the search.
867 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
868 break;
869
870 // We cannot shrink the element size any further if it is 2-bits.
871 if (EltSize == 2)
872 return false;
873
874 EltSize /= 2;
875 Mask >>= EltSize;
876 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
877
878 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
879 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
880 return false;
881
882 // Merge the upper and lower halves of Imm and DemandedBits.
883 Imm |= Hi;
884 DemandedBits |= DemandedBitsHi;
885 }
886
887 ++NumOptimizedImms;
888
889 // Replicate the element across the register width.
890 while (EltSize < Size) {
891 NewImm |= NewImm << EltSize;
892 EltSize *= 2;
893 }
894
895 (void)OldImm;
896 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 897, __PRETTY_FUNCTION__))
897 "demanded bits should never be altered")((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 897, __PRETTY_FUNCTION__))
;
898 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")((OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? static_cast<void> (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 898, __PRETTY_FUNCTION__))
;
899
900 // Create the new constant immediate node.
901 EVT VT = Op.getValueType();
902 SDLoc DL(Op);
903 SDValue New;
904
905 // If the new constant immediate is all-zeros or all-ones, let the target
906 // independent DAG combine optimize this node.
907 if (NewImm == 0 || NewImm == OrigMask) {
908 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
909 TLO.DAG.getConstant(NewImm, DL, VT));
910 // Otherwise, create a machine node so that target independent DAG combine
911 // doesn't undo this optimization.
912 } else {
913 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
914 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
915 New = SDValue(
916 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
917 }
918
919 return TLO.CombineTo(Op, New);
920}
921
922bool AArch64TargetLowering::targetShrinkDemandedConstant(
923 SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
924 // Delay this optimization to as late as possible.
925 if (!TLO.LegalOps)
926 return false;
927
928 if (!EnableOptimizeLogicalImm)
929 return false;
930
931 EVT VT = Op.getValueType();
932 if (VT.isVector())
933 return false;
934
935 unsigned Size = VT.getSizeInBits();
936 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 937, __PRETTY_FUNCTION__))
937 "i32 or i64 is expected after legalization.")(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 937, __PRETTY_FUNCTION__))
;
938
939 // Exit early if we demand all bits.
940 if (Demanded.countPopulation() == Size)
941 return false;
942
943 unsigned NewOpc;
944 switch (Op.getOpcode()) {
945 default:
946 return false;
947 case ISD::AND:
948 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
949 break;
950 case ISD::OR:
951 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
952 break;
953 case ISD::XOR:
954 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
955 break;
956 }
957 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
958 if (!C)
959 return false;
960 uint64_t Imm = C->getZExtValue();
961 return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
962}
963
964/// computeKnownBitsForTargetNode - Determine which of the bits specified in
965/// Mask are known to be either zero or one and return them Known.
966void AArch64TargetLowering::computeKnownBitsForTargetNode(
967 const SDValue Op, KnownBits &Known,
968 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
969 switch (Op.getOpcode()) {
970 default:
971 break;
972 case AArch64ISD::CSEL: {
973 KnownBits Known2;
974 DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
975 DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
976 Known.Zero &= Known2.Zero;
977 Known.One &= Known2.One;
978 break;
979 }
980 case ISD::INTRINSIC_W_CHAIN: {
981 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
982 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
983 switch (IntID) {
984 default: return;
985 case Intrinsic::aarch64_ldaxr:
986 case Intrinsic::aarch64_ldxr: {
987 unsigned BitWidth = Known.getBitWidth();
988 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
989 unsigned MemBits = VT.getScalarSizeInBits();
990 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
991 return;
992 }
993 }
994 break;
995 }
996 case ISD::INTRINSIC_WO_CHAIN:
997 case ISD::INTRINSIC_VOID: {
998 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
999 switch (IntNo) {
1000 default:
1001 break;
1002 case Intrinsic::aarch64_neon_umaxv:
1003 case Intrinsic::aarch64_neon_uminv: {
1004 // Figure out the datatype of the vector operand. The UMINV instruction
1005 // will zero extend the result, so we can mark as known zero all the
1006 // bits larger than the element datatype. 32-bit or larget doesn't need
1007 // this as those are legal types and will be handled by isel directly.
1008 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1009 unsigned BitWidth = Known.getBitWidth();
1010 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1011 assert(BitWidth >= 8 && "Unexpected width!")((BitWidth >= 8 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1011, __PRETTY_FUNCTION__))
;
1012 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1013 Known.Zero |= Mask;
1014 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1015 assert(BitWidth >= 16 && "Unexpected width!")((BitWidth >= 16 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1015, __PRETTY_FUNCTION__))
;
1016 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1017 Known.Zero |= Mask;
1018 }
1019 break;
1020 } break;
1021 }
1022 }
1023 }
1024}
1025
1026MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1027 EVT) const {
1028 return MVT::i64;
1029}
1030
1031bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1032 unsigned AddrSpace,
1033 unsigned Align,
1034 bool *Fast) const {
1035 if (Subtarget->requiresStrictAlign())
1036 return false;
1037
1038 if (Fast) {
1039 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1040 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1041 // See comments in performSTORECombine() for more details about
1042 // these conditions.
1043
1044 // Code that uses clang vector extensions can mark that it
1045 // wants unaligned accesses to be treated as fast by
1046 // underspecifying alignment to be 1 or 2.
1047 Align <= 2 ||
1048
1049 // Disregard v2i64. Memcpy lowering produces those and splitting
1050 // them regresses performance on micro-benchmarks and olden/bh.
1051 VT == MVT::v2i64;
1052 }
1053 return true;
1054}
1055
1056FastISel *
1057AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1058 const TargetLibraryInfo *libInfo) const {
1059 return AArch64::createFastISel(funcInfo, libInfo);
1060}
1061
1062const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1063 switch ((AArch64ISD::NodeType)Opcode) {
1064 case AArch64ISD::FIRST_NUMBER: break;
1065 case AArch64ISD::CALL: return "AArch64ISD::CALL";
1066 case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
1067 case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
1068 case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
1069 case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
1070 case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
1071 case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
1072 case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
1073 case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
1074 case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
1075 case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
1076 case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
1077 case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1078 case AArch64ISD::ADC: return "AArch64ISD::ADC";
1079 case AArch64ISD::SBC: return "AArch64ISD::SBC";
1080 case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
1081 case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
1082 case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
1083 case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
1084 case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
1085 case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
1086 case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
1087 case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
1088 case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
1089 case AArch64ISD::DUP: return "AArch64ISD::DUP";
1090 case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
1091 case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
1092 case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
1093 case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
1094 case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
1095 case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
1096 case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
1097 case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
1098 case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
1099 case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
1100 case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
1101 case AArch64ISD::BICi: return "AArch64ISD::BICi";
1102 case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
1103 case AArch64ISD::BSL: return "AArch64ISD::BSL";
1104 case AArch64ISD::NEG: return "AArch64ISD::NEG";
1105 case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
1106 case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
1107 case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
1108 case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
1109 case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
1110 case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
1111 case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
1112 case AArch64ISD::REV16: return "AArch64ISD::REV16";
1113 case AArch64ISD::REV32: return "AArch64ISD::REV32";
1114 case AArch64ISD::REV64: return "AArch64ISD::REV64";
1115 case AArch64ISD::EXT: return "AArch64ISD::EXT";
1116 case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
1117 case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
1118 case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
1119 case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
1120 case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
1121 case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
1122 case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
1123 case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
1124 case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
1125 case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
1126 case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
1127 case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
1128 case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
1129 case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
1130 case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
1131 case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
1132 case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
1133 case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
1134 case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
1135 case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
1136 case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
1137 case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
1138 case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
1139 case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
1140 case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
1141 case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
1142 case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
1143 case AArch64ISD::NOT: return "AArch64ISD::NOT";
1144 case AArch64ISD::BIT: return "AArch64ISD::BIT";
1145 case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
1146 case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
1147 case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
1148 case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
1149 case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
1150 case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
1151 case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
1152 case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
1153 case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
1154 case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
1155 case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
1156 case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
1157 case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
1158 case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
1159 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
1160 case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
1161 case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
1162 case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
1163 case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
1164 case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
1165 case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
1166 case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
1167 case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
1168 case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
1169 case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
1170 case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
1171 case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1172 case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1173 case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1174 case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1175 case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1176 case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1177 case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1178 case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1179 case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1180 case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1181 case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1182 case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1183 case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1184 case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1185 case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1186 case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1187 case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1188 case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1189 }
1190 return nullptr;
1191}
1192
1193MachineBasicBlock *
1194AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
1195 MachineBasicBlock *MBB) const {
1196 // We materialise the F128CSEL pseudo-instruction as some control flow and a
1197 // phi node:
1198
1199 // OrigBB:
1200 // [... previous instrs leading to comparison ...]
1201 // b.ne TrueBB
1202 // b EndBB
1203 // TrueBB:
1204 // ; Fallthrough
1205 // EndBB:
1206 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1207
1208 MachineFunction *MF = MBB->getParent();
1209 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1210 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1211 DebugLoc DL = MI.getDebugLoc();
1212 MachineFunction::iterator It = ++MBB->getIterator();
1213
1214 unsigned DestReg = MI.getOperand(0).getReg();
1215 unsigned IfTrueReg = MI.getOperand(1).getReg();
1216 unsigned IfFalseReg = MI.getOperand(2).getReg();
1217 unsigned CondCode = MI.getOperand(3).getImm();
1218 bool NZCVKilled = MI.getOperand(4).isKill();
1219
1220 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1221 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1222 MF->insert(It, TrueBB);
1223 MF->insert(It, EndBB);
1224
1225 // Transfer rest of current basic-block to EndBB
1226 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1227 MBB->end());
1228 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1229
1230 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1231 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1232 MBB->addSuccessor(TrueBB);
1233 MBB->addSuccessor(EndBB);
1234
1235 // TrueBB falls through to the end.
1236 TrueBB->addSuccessor(EndBB);
1237
1238 if (!NZCVKilled) {
1239 TrueBB->addLiveIn(AArch64::NZCV);
1240 EndBB->addLiveIn(AArch64::NZCV);
1241 }
1242
1243 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1244 .addReg(IfTrueReg)
1245 .addMBB(TrueBB)
1246 .addReg(IfFalseReg)
1247 .addMBB(MBB);
1248
1249 MI.eraseFromParent();
1250 return EndBB;
1251}
1252
1253MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
1254 MachineInstr &MI, MachineBasicBlock *BB) const {
1255 switch (MI.getOpcode()) {
1256 default:
1257#ifndef NDEBUG
1258 MI.dump();
1259#endif
1260 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1260)
;
1261
1262 case AArch64::F128CSEL:
1263 return EmitF128CSEL(MI, BB);
1264
1265 case TargetOpcode::STACKMAP:
1266 case TargetOpcode::PATCHPOINT:
1267 return emitPatchPoint(MI, BB);
1268 }
1269}
1270
1271//===----------------------------------------------------------------------===//
1272// AArch64 Lowering private implementation.
1273//===----------------------------------------------------------------------===//
1274
1275//===----------------------------------------------------------------------===//
1276// Lowering Code
1277//===----------------------------------------------------------------------===//
1278
1279/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1280/// CC
1281static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1282 switch (CC) {
1283 default:
1284 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1284)
;
1285 case ISD::SETNE:
1286 return AArch64CC::NE;
1287 case ISD::SETEQ:
1288 return AArch64CC::EQ;
1289 case ISD::SETGT:
1290 return AArch64CC::GT;
1291 case ISD::SETGE:
1292 return AArch64CC::GE;
1293 case ISD::SETLT:
1294 return AArch64CC::LT;
1295 case ISD::SETLE:
1296 return AArch64CC::LE;
1297 case ISD::SETUGT:
1298 return AArch64CC::HI;
1299 case ISD::SETUGE:
1300 return AArch64CC::HS;
1301 case ISD::SETULT:
1302 return AArch64CC::LO;
1303 case ISD::SETULE:
1304 return AArch64CC::LS;
1305 }
1306}
1307
1308/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1309static void changeFPCCToAArch64CC(ISD::CondCode CC,
1310 AArch64CC::CondCode &CondCode,
1311 AArch64CC::CondCode &CondCode2) {
1312 CondCode2 = AArch64CC::AL;
1313 switch (CC) {
1314 default:
1315 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1315)
;
1316 case ISD::SETEQ:
1317 case ISD::SETOEQ:
1318 CondCode = AArch64CC::EQ;
1319 break;
1320 case ISD::SETGT:
1321 case ISD::SETOGT:
1322 CondCode = AArch64CC::GT;
1323 break;
1324 case ISD::SETGE:
1325 case ISD::SETOGE:
1326 CondCode = AArch64CC::GE;
1327 break;
1328 case ISD::SETOLT:
1329 CondCode = AArch64CC::MI;
1330 break;
1331 case ISD::SETOLE:
1332 CondCode = AArch64CC::LS;
1333 break;
1334 case ISD::SETONE:
1335 CondCode = AArch64CC::MI;
1336 CondCode2 = AArch64CC::GT;
1337 break;
1338 case ISD::SETO:
1339 CondCode = AArch64CC::VC;
1340 break;
1341 case ISD::SETUO:
1342 CondCode = AArch64CC::VS;
1343 break;
1344 case ISD::SETUEQ:
1345 CondCode = AArch64CC::EQ;
1346 CondCode2 = AArch64CC::VS;
1347 break;
1348 case ISD::SETUGT:
1349 CondCode = AArch64CC::HI;
1350 break;
1351 case ISD::SETUGE:
1352 CondCode = AArch64CC::PL;
1353 break;
1354 case ISD::SETLT:
1355 case ISD::SETULT:
1356 CondCode = AArch64CC::LT;
1357 break;
1358 case ISD::SETLE:
1359 case ISD::SETULE:
1360 CondCode = AArch64CC::LE;
1361 break;
1362 case ISD::SETNE:
1363 case ISD::SETUNE:
1364 CondCode = AArch64CC::NE;
1365 break;
1366 }
1367}
1368
1369/// Convert a DAG fp condition code to an AArch64 CC.
1370/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1371/// should be AND'ed instead of OR'ed.
1372static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1373 AArch64CC::CondCode &CondCode,
1374 AArch64CC::CondCode &CondCode2) {
1375 CondCode2 = AArch64CC::AL;
1376 switch (CC) {
1377 default:
1378 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1379 assert(CondCode2 == AArch64CC::AL)((CondCode2 == AArch64CC::AL) ? static_cast<void> (0) :
__assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1379, __PRETTY_FUNCTION__))
;
1380 break;
1381 case ISD::SETONE:
1382 // (a one b)
1383 // == ((a olt b) || (a ogt b))
1384 // == ((a ord b) && (a une b))
1385 CondCode = AArch64CC::VC;
1386 CondCode2 = AArch64CC::NE;
1387 break;
1388 case ISD::SETUEQ:
1389 // (a ueq b)
1390 // == ((a uno b) || (a oeq b))
1391 // == ((a ule b) && (a uge b))
1392 CondCode = AArch64CC::PL;
1393 CondCode2 = AArch64CC::LE;
1394 break;
1395 }
1396}
1397
1398/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1399/// CC usable with the vector instructions. Fewer operations are available
1400/// without a real NZCV register, so we have to use less efficient combinations
1401/// to get the same effect.
1402static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1403 AArch64CC::CondCode &CondCode,
1404 AArch64CC::CondCode &CondCode2,
1405 bool &Invert) {
1406 Invert = false;
1407 switch (CC) {
1408 default:
1409 // Mostly the scalar mappings work fine.
1410 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1411 break;
1412 case ISD::SETUO:
1413 Invert = true;
1414 LLVM_FALLTHROUGH[[clang::fallthrough]];
1415 case ISD::SETO:
1416 CondCode = AArch64CC::MI;
1417 CondCode2 = AArch64CC::GE;
1418 break;
1419 case ISD::SETUEQ:
1420 case ISD::SETULT:
1421 case ISD::SETULE:
1422 case ISD::SETUGT:
1423 case ISD::SETUGE:
1424 // All of the compare-mask comparisons are ordered, but we can switch
1425 // between the two by a double inversion. E.g. ULE == !OGT.
1426 Invert = true;
1427 changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
1428 break;
1429 }
1430}
1431
1432static bool isLegalArithImmed(uint64_t C) {
1433 // Matches AArch64DAGToDAGISel::SelectArithImmed().
1434 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1435 DEBUG(dbgs() << "Is imm " << C << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
1436 return IsLegal;
1437}
1438
1439static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1440 const SDLoc &dl, SelectionDAG &DAG) {
1441 EVT VT = LHS.getValueType();
1442 const bool FullFP16 =
1443 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1444
1445 if (VT.isFloatingPoint()) {
1446 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1446, __PRETTY_FUNCTION__))
;
1447 if (VT == MVT::f16 && !FullFP16) {
1448 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1449 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1450 VT = MVT::f32;
1451 }
1452 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1453 }
1454
1455 // The CMP instruction is just an alias for SUBS, and representing it as
1456 // SUBS means that it's possible to get CSE with subtract operations.
1457 // A later phase can perform the optimization of setting the destination
1458 // register to WZR/XZR if it ends up being unused.
1459 unsigned Opcode = AArch64ISD::SUBS;
1460
1461 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
1462 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1463 // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
1464 // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
1465 // can be set differently by this operation. It comes down to whether
1466 // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1467 // everything is fine. If not then the optimization is wrong. Thus general
1468 // comparisons are only valid if op2 != 0.
1469
1470 // So, finally, the only LLVM-native comparisons that don't mention C and V
1471 // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1472 // the absence of information about op2.
1473 Opcode = AArch64ISD::ADDS;
1474 RHS = RHS.getOperand(1);
1475 } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
1476 !isUnsignedIntSetCC(CC)) {
1477 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1478 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1479 // of the signed comparisons.
1480 Opcode = AArch64ISD::ANDS;
1481 RHS = LHS.getOperand(1);
1482 LHS = LHS.getOperand(0);
1483 }
1484
1485 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1486 .getValue(1);
1487}
1488
1489/// \defgroup AArch64CCMP CMP;CCMP matching
1490///
1491/// These functions deal with the formation of CMP;CCMP;... sequences.
1492/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1493/// a comparison. They set the NZCV flags to a predefined value if their
1494/// predicate is false. This allows to express arbitrary conjunctions, for
1495/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
1496/// expressed as:
1497/// cmp A
1498/// ccmp B, inv(CB), CA
1499/// check for CB flags
1500///
1501/// In general we can create code for arbitrary "... (and (and A B) C)"
1502/// sequences. We can also implement some "or" expressions, because "(or A B)"
1503/// is equivalent to "not (and (not A) (not B))" and we can implement some
1504/// negation operations:
1505/// We can negate the results of a single comparison by inverting the flags
1506/// used when the predicate fails and inverting the flags tested in the next
1507/// instruction; We can also negate the results of the whole previous
1508/// conditional compare sequence by inverting the flags tested in the next
1509/// instruction. However there is no way to negate the result of a partial
1510/// sequence.
1511///
1512/// Therefore on encountering an "or" expression we can negate the subtree on
1513/// one side and have to be able to push the negate to the leafs of the subtree
1514/// on the other side (see also the comments in code). As complete example:
1515/// "or (or (setCA (cmp A)) (setCB (cmp B)))
1516/// (and (setCC (cmp C)) (setCD (cmp D)))"
1517/// is transformed to
1518/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
1519/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1520/// and implemented as:
1521/// cmp C
1522/// ccmp D, inv(CD), CC
1523/// ccmp A, CA, inv(CD)
1524/// ccmp B, CB, inv(CA)
1525/// check for CB flags
1526/// A counterexample is "or (and A B) (and C D)" which cannot be implemented
1527/// by conditional compare sequences.
1528/// @{
1529
1530/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1531static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1532 ISD::CondCode CC, SDValue CCOp,
1533 AArch64CC::CondCode Predicate,
1534 AArch64CC::CondCode OutCC,
1535 const SDLoc &DL, SelectionDAG &DAG) {
1536 unsigned Opcode = 0;
1537 const bool FullFP16 =
1538 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1539
1540 if (LHS.getValueType().isFloatingPoint()) {
1541 assert(LHS.getValueType() != MVT::f128)((LHS.getValueType() != MVT::f128) ? static_cast<void> (
0) : __assert_fail ("LHS.getValueType() != MVT::f128", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1541, __PRETTY_FUNCTION__))
;
1542 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
1543 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1544 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1545 }
1546 Opcode = AArch64ISD::FCCMP;
1547 } else if (RHS.getOpcode() == ISD::SUB) {
1548 SDValue SubOp0 = RHS.getOperand(0);
1549 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1550 // See emitComparison() on why we can only do this for SETEQ and SETNE.
1551 Opcode = AArch64ISD::CCMN;
1552 RHS = RHS.getOperand(1);
1553 }
1554 }
1555 if (Opcode == 0)
1556 Opcode = AArch64ISD::CCMP;
1557
1558 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1559 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1560 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1561 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1562 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1563}
1564
1565/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
1566/// CanPushNegate is set to true if we can push a negate operation through
1567/// the tree in a was that we are left with AND operations and negate operations
1568/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
1569/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
1570/// brought into such a form.
1571static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate,
1572 unsigned Depth = 0) {
1573 if (!Val.hasOneUse())
1574 return false;
1575 unsigned Opcode = Val->getOpcode();
1576 if (Opcode == ISD::SETCC) {
1577 if (Val->getOperand(0).getValueType() == MVT::f128)
1578 return false;
1579 CanNegate = true;
1580 return true;
1581 }
1582 // Protect against exponential runtime and stack overflow.
1583 if (Depth > 6)
1584 return false;
1585 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1586 SDValue O0 = Val->getOperand(0);
1587 SDValue O1 = Val->getOperand(1);
1588 bool CanNegateL;
1589 if (!isConjunctionDisjunctionTree(O0, CanNegateL, Depth+1))
1590 return false;
1591 bool CanNegateR;
1592 if (!isConjunctionDisjunctionTree(O1, CanNegateR, Depth+1))
1593 return false;
1594
1595 if (Opcode == ISD::OR) {
1596 // For an OR expression we need to be able to negate at least one side or
1597 // we cannot do the transformation at all.
1598 if (!CanNegateL && !CanNegateR)
1599 return false;
1600 // We can however change a (not (or x y)) to (and (not x) (not y)) if we
1601 // can negate the x and y subtrees.
1602 CanNegate = CanNegateL && CanNegateR;
1603 } else {
1604 // If the operands are OR expressions then we finally need to negate their
1605 // outputs, we can only do that for the operand with emitted last by
1606 // negating OutCC, not for both operands.
1607 bool NeedsNegOutL = O0->getOpcode() == ISD::OR;
1608 bool NeedsNegOutR = O1->getOpcode() == ISD::OR;
1609 if (NeedsNegOutL && NeedsNegOutR)
1610 return false;
1611 // We cannot negate an AND operation (it would become an OR),
1612 CanNegate = false;
1613 }
1614 return true;
1615 }
1616 return false;
1617}
1618
1619/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1620/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1621/// Tries to transform the given i1 producing node @p Val to a series compare
1622/// and conditional compare operations. @returns an NZCV flags producing node
1623/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1624/// transformation was not possible.
1625/// On recursive invocations @p PushNegate may be set to true to have negation
1626/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
1627/// for the comparisons in the current subtree; @p Depth limits the search
1628/// depth to avoid stack overflow.
1629static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val,
1630 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1631 AArch64CC::CondCode Predicate) {
1632 // We're at a tree leaf, produce a conditional comparison operation.
1633 unsigned Opcode = Val->getOpcode();
1634 if (Opcode == ISD::SETCC) {
1635 SDValue LHS = Val->getOperand(0);
1636 SDValue RHS = Val->getOperand(1);
1637 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1638 bool isInteger = LHS.getValueType().isInteger();
1639 if (Negate)
1640 CC = getSetCCInverse(CC, isInteger);
1641 SDLoc DL(Val);
1642 // Determine OutCC and handle FP special case.
1643 if (isInteger) {
1644 OutCC = changeIntCCToAArch64CC(CC);
1645 } else {
1646 assert(LHS.getValueType().isFloatingPoint())((LHS.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1646, __PRETTY_FUNCTION__))
;
1647 AArch64CC::CondCode ExtraCC;
1648 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1649 // Some floating point conditions can't be tested with a single condition
1650 // code. Construct an additional comparison in this case.
1651 if (ExtraCC != AArch64CC::AL) {
1652 SDValue ExtraCmp;
1653 if (!CCOp.getNode())
1654 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1655 else
1656 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1657 ExtraCC, DL, DAG);
1658 CCOp = ExtraCmp;
1659 Predicate = ExtraCC;
1660 }
1661 }
1662
1663 // Produce a normal comparison if we are first in the chain
1664 if (!CCOp)
1665 return emitComparison(LHS, RHS, CC, DL, DAG);
1666 // Otherwise produce a ccmp.
1667 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
1668 DAG);
1669 }
1670 assert((Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) &&(((Opcode == ISD::AND || (Opcode == ISD::OR && Val->
hasOneUse())) && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1671, __PRETTY_FUNCTION__))
1671 "Valid conjunction/disjunction tree")(((Opcode == ISD::AND || (Opcode == ISD::OR && Val->
hasOneUse())) && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1671, __PRETTY_FUNCTION__))
;
1672
1673 // Check if both sides can be transformed.
1674 SDValue LHS = Val->getOperand(0);
1675 SDValue RHS = Val->getOperand(1);
1676
1677 // In case of an OR we need to negate our operands and the result.
1678 // (A v B) <=> not(not(A) ^ not(B))
1679 bool NegateOpsAndResult = Opcode == ISD::OR;
1680 // We can negate the results of all previous operations by inverting the
1681 // predicate flags giving us a free negation for one side. The other side
1682 // must be negatable by itself.
1683 if (NegateOpsAndResult) {
1684 // See which side we can negate.
1685 bool CanNegateL;
1686 bool isValidL = isConjunctionDisjunctionTree(LHS, CanNegateL);
1687 assert(isValidL && "Valid conjunction/disjunction tree")((isValidL && "Valid conjunction/disjunction tree") ?
static_cast<void> (0) : __assert_fail ("isValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1687, __PRETTY_FUNCTION__))
;
1688 (void)isValidL;
1689
1690#ifndef NDEBUG
1691 bool CanNegateR;
1692 bool isValidR = isConjunctionDisjunctionTree(RHS, CanNegateR);
1693 assert(isValidR && "Valid conjunction/disjunction tree")((isValidR && "Valid conjunction/disjunction tree") ?
static_cast<void> (0) : __assert_fail ("isValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1693, __PRETTY_FUNCTION__))
;
1694 assert((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree")(((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("(CanNegateL || CanNegateR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1694, __PRETTY_FUNCTION__))
;
1695#endif
1696
1697 // Order the side which we cannot negate to RHS so we can emit it first.
1698 if (!CanNegateL)
1699 std::swap(LHS, RHS);
1700 } else {
1701 bool NeedsNegOutL = LHS->getOpcode() == ISD::OR;
1702 assert((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) &&(((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) &&
"Valid conjunction/disjunction tree") ? static_cast<void>
(0) : __assert_fail ("(!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1703, __PRETTY_FUNCTION__))
1703 "Valid conjunction/disjunction tree")(((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) &&
"Valid conjunction/disjunction tree") ? static_cast<void>
(0) : __assert_fail ("(!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1703, __PRETTY_FUNCTION__))
;
1704 // Order the side where we need to negate the output flags to RHS so it
1705 // gets emitted first.
1706 if (NeedsNegOutL)
1707 std::swap(LHS, RHS);
1708 }
1709
1710 // Emit RHS. If we want to negate the tree we only need to push a negate
1711 // through if we are already in a PushNegate case, otherwise we can negate
1712 // the "flags to test" afterwards.
1713 AArch64CC::CondCode RHSCC;
1714 SDValue CmpR = emitConjunctionDisjunctionTreeRec(DAG, RHS, RHSCC, Negate,
1715 CCOp, Predicate);
1716 if (NegateOpsAndResult && !Negate)
1717 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1718 // Emit LHS. We may need to negate it.
1719 SDValue CmpL = emitConjunctionDisjunctionTreeRec(DAG, LHS, OutCC,
1720 NegateOpsAndResult, CmpR,
1721 RHSCC);
1722 // If we transformed an OR to and AND then we have to negate the result
1723 // (or absorb the Negate parameter).
1724 if (NegateOpsAndResult && !Negate)
1725 OutCC = AArch64CC::getInvertedCondCode(OutCC);
1726 return CmpL;
1727}
1728
1729/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1730/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1731/// \see emitConjunctionDisjunctionTreeRec().
1732static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
1733 AArch64CC::CondCode &OutCC) {
1734 bool CanNegate;
1735 if (!isConjunctionDisjunctionTree(Val, CanNegate))
1736 return SDValue();
1737
1738 return emitConjunctionDisjunctionTreeRec(DAG, Val, OutCC, false, SDValue(),
1739 AArch64CC::AL);
1740}
1741
1742/// @}
1743
1744static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1745 SDValue &AArch64cc, SelectionDAG &DAG,
1746 const SDLoc &dl) {
1747 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1748 EVT VT = RHS.getValueType();
1749 uint64_t C = RHSC->getZExtValue();
1750 if (!isLegalArithImmed(C)) {
1751 // Constant does not fit, try adjusting it by one?
1752 switch (CC) {
1753 default:
1754 break;
1755 case ISD::SETLT:
1756 case ISD::SETGE:
1757 if ((VT == MVT::i32 && C != 0x80000000 &&
1758 isLegalArithImmed((uint32_t)(C - 1))) ||
1759 (VT == MVT::i64 && C != 0x80000000ULL &&
1760 isLegalArithImmed(C - 1ULL))) {
1761 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1762 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1763 RHS = DAG.getConstant(C, dl, VT);
1764 }
1765 break;
1766 case ISD::SETULT:
1767 case ISD::SETUGE:
1768 if ((VT == MVT::i32 && C != 0 &&
1769 isLegalArithImmed((uint32_t)(C - 1))) ||
1770 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
1771 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1772 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1773 RHS = DAG.getConstant(C, dl, VT);
1774 }
1775 break;
1776 case ISD::SETLE:
1777 case ISD::SETGT:
1778 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
1779 isLegalArithImmed((uint32_t)(C + 1))) ||
1780 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
1781 isLegalArithImmed(C + 1ULL))) {
1782 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1783 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1784 RHS = DAG.getConstant(C, dl, VT);
1785 }
1786 break;
1787 case ISD::SETULE:
1788 case ISD::SETUGT:
1789 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
1790 isLegalArithImmed((uint32_t)(C + 1))) ||
1791 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
1792 isLegalArithImmed(C + 1ULL))) {
1793 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1794 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1795 RHS = DAG.getConstant(C, dl, VT);
1796 }
1797 break;
1798 }
1799 }
1800 }
1801 SDValue Cmp;
1802 AArch64CC::CondCode AArch64CC;
1803 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
1804 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
1805
1806 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
1807 // For the i8 operand, the largest immediate is 255, so this can be easily
1808 // encoded in the compare instruction. For the i16 operand, however, the
1809 // largest immediate cannot be encoded in the compare.
1810 // Therefore, use a sign extending load and cmn to avoid materializing the
1811 // -1 constant. For example,
1812 // movz w1, #65535
1813 // ldrh w0, [x0, #0]
1814 // cmp w0, w1
1815 // >
1816 // ldrsh w0, [x0, #0]
1817 // cmn w0, #1
1818 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
1819 // if and only if (sext LHS) == (sext RHS). The checks are in place to
1820 // ensure both the LHS and RHS are truly zero extended and to make sure the
1821 // transformation is profitable.
1822 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
1823 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
1824 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
1825 LHS.getNode()->hasNUsesOfValue(1, 0)) {
1826 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
1827 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
1828 SDValue SExt =
1829 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
1830 DAG.getValueType(MVT::i16));
1831 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
1832 RHS.getValueType()),
1833 CC, dl, DAG);
1834 AArch64CC = changeIntCCToAArch64CC(CC);
1835 }
1836 }
1837
1838 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
1839 if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
1840 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
1841 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
1842 }
1843 }
1844 }
1845
1846 if (!Cmp) {
1847 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
1848 AArch64CC = changeIntCCToAArch64CC(CC);
1849 }
1850 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
1851 return Cmp;
1852}
1853
1854static std::pair<SDValue, SDValue>
1855getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
1856 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1857, __PRETTY_FUNCTION__))
1857 "Unsupported value type")(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1857, __PRETTY_FUNCTION__))
;
1858 SDValue Value, Overflow;
1859 SDLoc DL(Op);
1860 SDValue LHS = Op.getOperand(0);
1861 SDValue RHS = Op.getOperand(1);
1862 unsigned Opc = 0;
1863 switch (Op.getOpcode()) {
1864 default:
1865 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1865)
;
1866 case ISD::SADDO:
1867 Opc = AArch64ISD::ADDS;
1868 CC = AArch64CC::VS;
1869 break;
1870 case ISD::UADDO:
1871 Opc = AArch64ISD::ADDS;
1872 CC = AArch64CC::HS;
1873 break;
1874 case ISD::SSUBO:
1875 Opc = AArch64ISD::SUBS;
1876 CC = AArch64CC::VS;
1877 break;
1878 case ISD::USUBO:
1879 Opc = AArch64ISD::SUBS;
1880 CC = AArch64CC::LO;
1881 break;
1882 // Multiply needs a little bit extra work.
1883 case ISD::SMULO:
1884 case ISD::UMULO: {
1885 CC = AArch64CC::NE;
1886 bool IsSigned = Op.getOpcode() == ISD::SMULO;
1887 if (Op.getValueType() == MVT::i32) {
1888 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1889 // For a 32 bit multiply with overflow check we want the instruction
1890 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
1891 // need to generate the following pattern:
1892 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
1893 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
1894 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
1895 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1896 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
1897 DAG.getConstant(0, DL, MVT::i64));
1898 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
1899 // operation. We need to clear out the upper 32 bits, because we used a
1900 // widening multiply that wrote all 64 bits. In the end this should be a
1901 // noop.
1902 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
1903 if (IsSigned) {
1904 // The signed overflow check requires more than just a simple check for
1905 // any bit set in the upper 32 bits of the result. These bits could be
1906 // just the sign bits of a negative number. To perform the overflow
1907 // check we have to arithmetic shift right the 32nd bit of the result by
1908 // 31 bits. Then we compare the result to the upper 32 bits.
1909 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
1910 DAG.getConstant(32, DL, MVT::i64));
1911 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
1912 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
1913 DAG.getConstant(31, DL, MVT::i64));
1914 // It is important that LowerBits is last, otherwise the arithmetic
1915 // shift will not be folded into the compare (SUBS).
1916 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
1917 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1918 .getValue(1);
1919 } else {
1920 // The overflow check for unsigned multiply is easy. We only need to
1921 // check if any of the upper 32 bits are set. This can be done with a
1922 // CMP (shifted register). For that we need to generate the following
1923 // pattern:
1924 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
1925 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
1926 DAG.getConstant(32, DL, MVT::i64));
1927 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1928 Overflow =
1929 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1930 DAG.getConstant(0, DL, MVT::i64),
1931 UpperBits).getValue(1);
1932 }
1933 break;
1934 }
1935 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")((Op.getValueType() == MVT::i64 && "Expected an i64 value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1935, __PRETTY_FUNCTION__))
;
1936 // For the 64 bit multiply
1937 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1938 if (IsSigned) {
1939 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
1940 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
1941 DAG.getConstant(63, DL, MVT::i64));
1942 // It is important that LowerBits is last, otherwise the arithmetic
1943 // shift will not be folded into the compare (SUBS).
1944 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1945 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1946 .getValue(1);
1947 } else {
1948 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
1949 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1950 Overflow =
1951 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1952 DAG.getConstant(0, DL, MVT::i64),
1953 UpperBits).getValue(1);
1954 }
1955 break;
1956 }
1957 } // switch (...)
1958
1959 if (Opc) {
1960 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
1961
1962 // Emit the AArch64 operation with overflow check.
1963 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
1964 Overflow = Value.getValue(1);
1965 }
1966 return std::make_pair(Value, Overflow);
1967}
1968
1969SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
1970 RTLIB::Libcall Call) const {
1971 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
1972 return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
1973}
1974
1975// Returns true if the given Op is the overflow flag result of an overflow
1976// intrinsic operation.
1977static bool isOverflowIntrOpRes(SDValue Op) {
1978 unsigned Opc = Op.getOpcode();
1979 return (Op.getResNo() == 1 &&
1980 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
1981 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
1982}
1983
1984static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
1985 SDValue Sel = Op.getOperand(0);
1986 SDValue Other = Op.getOperand(1);
1987 SDLoc dl(Sel);
1988
1989 // If the operand is an overflow checking operation, invert the condition
1990 // code and kill the Not operation. I.e., transform:
1991 // (xor (overflow_op_bool, 1))
1992 // -->
1993 // (csel 1, 0, invert(cc), overflow_op_bool)
1994 // ... which later gets transformed to just a cset instruction with an
1995 // inverted condition code, rather than a cset + eor sequence.
1996 if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
1997 // Only lower legal XALUO ops.
1998 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
1999 return SDValue();
2000
2001 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2002 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2003 AArch64CC::CondCode CC;
2004 SDValue Value, Overflow;
2005 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2006 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2007 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2008 CCVal, Overflow);
2009 }
2010 // If neither operand is a SELECT_CC, give up.
2011 if (Sel.getOpcode() != ISD::SELECT_CC)
2012 std::swap(Sel, Other);
2013 if (Sel.getOpcode() != ISD::SELECT_CC)
2014 return Op;
2015
2016 // The folding we want to perform is:
2017 // (xor x, (select_cc a, b, cc, 0, -1) )
2018 // -->
2019 // (csel x, (xor x, -1), cc ...)
2020 //
2021 // The latter will get matched to a CSINV instruction.
2022
2023 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2024 SDValue LHS = Sel.getOperand(0);
2025 SDValue RHS = Sel.getOperand(1);
2026 SDValue TVal = Sel.getOperand(2);
2027 SDValue FVal = Sel.getOperand(3);
2028
2029 // FIXME: This could be generalized to non-integer comparisons.
2030 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2031 return Op;
2032
2033 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2034 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2035
2036 // The values aren't constants, this isn't the pattern we're looking for.
2037 if (!CFVal || !CTVal)
2038 return Op;
2039
2040 // We can commute the SELECT_CC by inverting the condition. This
2041 // might be needed to make this fit into a CSINV pattern.
2042 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2043 std::swap(TVal, FVal);
2044 std::swap(CTVal, CFVal);
2045 CC = ISD::getSetCCInverse(CC, true);
2046 }
2047
2048 // If the constants line up, perform the transform!
2049 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2050 SDValue CCVal;
2051 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2052
2053 FVal = Other;
2054 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2055 DAG.getConstant(-1ULL, dl, Other.getValueType()));
2056
2057 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2058 CCVal, Cmp);
2059 }
2060
2061 return Op;
2062}
2063
2064static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2065 EVT VT = Op.getValueType();
2066
2067 // Let legalize expand this if it isn't a legal type yet.
2068 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2069 return SDValue();
2070
2071 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2072
2073 unsigned Opc;
2074 bool ExtraOp = false;
2075 switch (Op.getOpcode()) {
2076 default:
2077 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2077)
;
2078 case ISD::ADDC:
2079 Opc = AArch64ISD::ADDS;
2080 break;
2081 case ISD::SUBC:
2082 Opc = AArch64ISD::SUBS;
2083 break;
2084 case ISD::ADDE:
2085 Opc = AArch64ISD::ADCS;
2086 ExtraOp = true;
2087 break;
2088 case ISD::SUBE:
2089 Opc = AArch64ISD::SBCS;
2090 ExtraOp = true;
2091 break;
2092 }
2093
2094 if (!ExtraOp)
2095 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2096 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2097 Op.getOperand(2));
2098}
2099
2100static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
2101 // Let legalize expand this if it isn't a legal type yet.
2102 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
2103 return SDValue();
2104
2105 SDLoc dl(Op);
2106 AArch64CC::CondCode CC;
2107 // The actual operation that sets the overflow or carry flag.
2108 SDValue Value, Overflow;
2109 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2110
2111 // We use 0 and 1 as false and true values.
2112 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2113 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2114
2115 // We use an inverted condition, because the conditional select is inverted
2116 // too. This will allow it to be selected to a single instruction:
2117 // CSINC Wd, WZR, WZR, invert(cond).
2118 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2119 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2120 CCVal, Overflow);
2121
2122 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2123 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2124}
2125
2126// Prefetch operands are:
2127// 1: Address to prefetch
2128// 2: bool isWrite
2129// 3: int locality (0 = no locality ... 3 = extreme locality)
2130// 4: bool isDataCache
2131static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
2132 SDLoc DL(Op);
2133 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2134 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2135 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2136
2137 bool IsStream = !Locality;
2138 // When the locality number is set
2139 if (Locality) {
2140 // The front-end should have filtered out the out-of-range values
2141 assert(Locality <= 3 && "Prefetch locality out-of-range")((Locality <= 3 && "Prefetch locality out-of-range"
) ? static_cast<void> (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2141, __PRETTY_FUNCTION__))
;
2142 // The locality degree is the opposite of the cache speed.
2143 // Put the number the other way around.
2144 // The encoding starts at 0 for level 1
2145 Locality = 3 - Locality;
2146 }
2147
2148 // built the mask value encoding the expected behavior.
2149 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
2150 (!IsData << 3) | // IsDataCache bit
2151 (Locality << 1) | // Cache level bits
2152 (unsigned)IsStream; // Stream bit
2153 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2154 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2155}
2156
2157SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2158 SelectionDAG &DAG) const {
2159 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")((Op.getValueType() == MVT::f128 && "Unexpected lowering"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2159, __PRETTY_FUNCTION__))
;
2160
2161 RTLIB::Libcall LC;
2162 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
2163
2164 return LowerF128Call(Op, DAG, LC);
2165}
2166
2167SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2168 SelectionDAG &DAG) const {
2169 if (Op.getOperand(0).getValueType() != MVT::f128) {
2170 // It's legal except when f128 is involved
2171 return Op;
2172 }
2173
2174 RTLIB::Libcall LC;
2175 LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
2176
2177 // FP_ROUND node has a second operand indicating whether it is known to be
2178 // precise. That doesn't take part in the LibCall so we can't directly use
2179 // LowerF128Call.
2180 SDValue SrcVal = Op.getOperand(0);
2181 return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
2182 SDLoc(Op)).first;
2183}
2184
2185static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2186 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2187 // Any additional optimization in this function should be recorded
2188 // in the cost tables.
2189 EVT InVT = Op.getOperand(0).getValueType();
2190 EVT VT = Op.getValueType();
2191 unsigned NumElts = InVT.getVectorNumElements();
2192
2193 // f16 vectors are promoted to f32 before a conversion.
2194 if (InVT.getVectorElementType() == MVT::f16) {
2195 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2196 SDLoc dl(Op);
2197 return DAG.getNode(
2198 Op.getOpcode(), dl, Op.getValueType(),
2199 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2200 }
2201
2202 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2203 SDLoc dl(Op);
2204 SDValue Cv =
2205 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2206 Op.getOperand(0));
2207 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2208 }
2209
2210 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2211 SDLoc dl(Op);
2212 MVT ExtVT =
2213 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
2214 VT.getVectorNumElements());
2215 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2216 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2217 }
2218
2219 // Type changing conversions are illegal.
2220 return Op;
2221}
2222
2223SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2224 SelectionDAG &DAG) const {
2225 if (Op.getOperand(0).getValueType().isVector())
2226 return LowerVectorFP_TO_INT(Op, DAG);
2227
2228 // f16 conversions are promoted to f32 when full fp16 is not supported.
2229 if (Op.getOperand(0).getValueType() == MVT::f16 &&
2230 !Subtarget->hasFullFP16()) {
2231 SDLoc dl(Op);
2232 return DAG.getNode(
2233 Op.getOpcode(), dl, Op.getValueType(),
2234 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
2235 }
2236
2237 if (Op.getOperand(0).getValueType() != MVT::f128) {
2238 // It's legal except when f128 is involved
2239 return Op;
2240 }
2241
2242 RTLIB::Libcall LC;
2243 if (Op.getOpcode() == ISD::FP_TO_SINT)
2244 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
2245 else
2246 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
2247
2248 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2249 return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
2250}
2251
2252static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2253 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2254 // Any additional optimization in this function should be recorded
2255 // in the cost tables.
2256 EVT VT = Op.getValueType();
2257 SDLoc dl(Op);
2258 SDValue In = Op.getOperand(0);
2259 EVT InVT = In.getValueType();
2260
2261 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2262 MVT CastVT =
2263 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
2264 InVT.getVectorNumElements());
2265 In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2266 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2267 }
2268
2269 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2270 unsigned CastOpc =
2271 Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2272 EVT CastVT = VT.changeVectorElementTypeToInteger();
2273 In = DAG.getNode(CastOpc, dl, CastVT, In);
2274 return DAG.getNode(Op.getOpcode(), dl, VT, In);
2275 }
2276
2277 return Op;
2278}
2279
2280SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2281 SelectionDAG &DAG) const {
2282 if (Op.getValueType().isVector())
2283 return LowerVectorINT_TO_FP(Op, DAG);
2284
2285 // f16 conversions are promoted to f32 when full fp16 is not supported.
2286 if (Op.getValueType() == MVT::f16 &&
2287 !Subtarget->hasFullFP16()) {
2288 SDLoc dl(Op);
2289 return DAG.getNode(
2290 ISD::FP_ROUND, dl, MVT::f16,
2291 DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
2292 DAG.getIntPtrConstant(0, dl));
2293 }
2294
2295 // i128 conversions are libcalls.
2296 if (Op.getOperand(0).getValueType() == MVT::i128)
2297 return SDValue();
2298
2299 // Other conversions are legal, unless it's to the completely software-based
2300 // fp128.
2301 if (Op.getValueType() != MVT::f128)
2302 return Op;
2303
2304 RTLIB::Libcall LC;
2305 if (Op.getOpcode() == ISD::SINT_TO_FP)
2306 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2307 else
2308 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2309
2310 return LowerF128Call(Op, DAG, LC);
2311}
2312
2313SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2314 SelectionDAG &DAG) const {
2315 // For iOS, we want to call an alternative entry point: __sincos_stret,
2316 // which returns the values in two S / D registers.
2317 SDLoc dl(Op);
2318 SDValue Arg = Op.getOperand(0);
2319 EVT ArgVT = Arg.getValueType();
2320 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2321
2322 ArgListTy Args;
2323 ArgListEntry Entry;
2324
2325 Entry.Node = Arg;
2326 Entry.Ty = ArgTy;
2327 Entry.IsSExt = false;
2328 Entry.IsZExt = false;
2329 Args.push_back(Entry);
2330
2331 const char *LibcallName =
2332 (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
2333 SDValue Callee =
2334 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2335
2336 StructType *RetTy = StructType::get(ArgTy, ArgTy);
2337 TargetLowering::CallLoweringInfo CLI(DAG);
2338 CLI.setDebugLoc(dl)
2339 .setChain(DAG.getEntryNode())
2340 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2341
2342 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2343 return CallResult.first;
2344}
2345
2346static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2347 if (Op.getValueType() != MVT::f16)
2348 return SDValue();
2349
2350 assert(Op.getOperand(0).getValueType() == MVT::i16)((Op.getOperand(0).getValueType() == MVT::i16) ? static_cast<
void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2350, __PRETTY_FUNCTION__))
;
2351 SDLoc DL(Op);
2352
2353 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2354 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2355 return SDValue(
2356 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2357 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2358 0);
2359}
2360
2361static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2362 if (OrigVT.getSizeInBits() >= 64)
2363 return OrigVT;
2364
2365 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2365, __PRETTY_FUNCTION__))
;
2366
2367 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2368 switch (OrigSimpleTy) {
2369 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2369)
;
2370 case MVT::v2i8:
2371 case MVT::v2i16:
2372 return MVT::v2i32;
2373 case MVT::v4i8:
2374 return MVT::v4i16;
2375 }
2376}
2377
2378static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2379 const EVT &OrigTy,
2380 const EVT &ExtTy,
2381 unsigned ExtOpcode) {
2382 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2383 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2384 // 64-bits we need to insert a new extension so that it will be 64-bits.
2385 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2385, __PRETTY_FUNCTION__))
;
2386 if (OrigTy.getSizeInBits() >= 64)
2387 return N;
2388
2389 // Must extend size to at least 64 bits to be used as an operand for VMULL.
2390 EVT NewVT = getExtensionTo64Bits(OrigTy);
2391
2392 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2393}
2394
2395static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2396 bool isSigned) {
2397 EVT VT = N->getValueType(0);
2398
2399 if (N->getOpcode() != ISD::BUILD_VECTOR)
2400 return false;
2401
2402 for (const SDValue &Elt : N->op_values()) {
2403 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2404 unsigned EltSize = VT.getScalarSizeInBits();
2405 unsigned HalfSize = EltSize / 2;
2406 if (isSigned) {
2407 if (!isIntN(HalfSize, C->getSExtValue()))
2408 return false;
2409 } else {
2410 if (!isUIntN(HalfSize, C->getZExtValue()))
2411 return false;
2412 }
2413 continue;
2414 }
2415 return false;
2416 }
2417
2418 return true;
2419}
2420
2421static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2422 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2423 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2424 N->getOperand(0)->getValueType(0),
2425 N->getValueType(0),
2426 N->getOpcode());
2427
2428 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2428, __PRETTY_FUNCTION__))
;
2429 EVT VT = N->getValueType(0);
2430 SDLoc dl(N);
2431 unsigned EltSize = VT.getScalarSizeInBits() / 2;
2432 unsigned NumElts = VT.getVectorNumElements();
2433 MVT TruncVT = MVT::getIntegerVT(EltSize);
2434 SmallVector<SDValue, 8> Ops;
2435 for (unsigned i = 0; i != NumElts; ++i) {
2436 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2437 const APInt &CInt = C->getAPIntValue();
2438 // Element types smaller than 32 bits are not legal, so use i32 elements.
2439 // The values are implicitly truncated so sext vs. zext doesn't matter.
2440 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2441 }
2442 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2443}
2444
2445static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2446 return N->getOpcode() == ISD::SIGN_EXTEND ||
2447 isExtendedBUILD_VECTOR(N, DAG, true);
2448}
2449
2450static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2451 return N->getOpcode() == ISD::ZERO_EXTEND ||
2452 isExtendedBUILD_VECTOR(N, DAG, false);
2453}
2454
2455static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2456 unsigned Opcode = N->getOpcode();
2457 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2458 SDNode *N0 = N->getOperand(0).getNode();
2459 SDNode *N1 = N->getOperand(1).getNode();
2460 return N0->hasOneUse() && N1->hasOneUse() &&
2461 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2462 }
2463 return false;
2464}
2465
2466static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2467 unsigned Opcode = N->getOpcode();
2468 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2469 SDNode *N0 = N->getOperand(0).getNode();
2470 SDNode *N1 = N->getOperand(1).getNode();
2471 return N0->hasOneUse() && N1->hasOneUse() &&
2472 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2473 }
2474 return false;
2475}
2476
2477static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2478 // Multiplications are only custom-lowered for 128-bit vectors so that
2479 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2480 EVT VT = Op.getValueType();
2481 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2482, __PRETTY_FUNCTION__))
2482 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2482, __PRETTY_FUNCTION__))
;
2483 SDNode *N0 = Op.getOperand(0).getNode();
2484 SDNode *N1 = Op.getOperand(1).getNode();
2485 unsigned NewOpc = 0;
2486 bool isMLA = false;
2487 bool isN0SExt = isSignExtended(N0, DAG);
2488 bool isN1SExt = isSignExtended(N1, DAG);
2489 if (isN0SExt && isN1SExt)
2490 NewOpc = AArch64ISD::SMULL;
2491 else {
2492 bool isN0ZExt = isZeroExtended(N0, DAG);
2493 bool isN1ZExt = isZeroExtended(N1, DAG);
2494 if (isN0ZExt && isN1ZExt)
2495 NewOpc = AArch64ISD::UMULL;
2496 else if (isN1SExt || isN1ZExt) {
2497 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2498 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2499 if (isN1SExt && isAddSubSExt(N0, DAG)) {
2500 NewOpc = AArch64ISD::SMULL;
2501 isMLA = true;
2502 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2503 NewOpc = AArch64ISD::UMULL;
2504 isMLA = true;
2505 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2506 std::swap(N0, N1);
2507 NewOpc = AArch64ISD::UMULL;
2508 isMLA = true;
2509 }
2510 }
2511
2512 if (!NewOpc) {
2513 if (VT == MVT::v2i64)
2514 // Fall through to expand this. It is not legal.
2515 return SDValue();
2516 else
2517 // Other vector multiplications are legal.
2518 return Op;
2519 }
2520 }
2521
2522 // Legalize to a S/UMULL instruction
2523 SDLoc DL(Op);
2524 SDValue Op0;
2525 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2526 if (!isMLA) {
2527 Op0 = skipExtensionForVectorMULL(N0, DAG);
2528 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2530, __PRETTY_FUNCTION__))
2529 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2530, __PRETTY_FUNCTION__))
2530 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2530, __PRETTY_FUNCTION__))
;
2531 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2532 }
2533 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2534 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2535 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2536 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2537 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2538 EVT Op1VT = Op1.getValueType();
2539 return DAG.getNode(N0->getOpcode(), DL, VT,
2540 DAG.getNode(NewOpc, DL, VT,
2541 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2542 DAG.getNode(NewOpc, DL, VT,
2543 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2544}
2545
2546SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2547 SelectionDAG &DAG) const {
2548 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2549 SDLoc dl(Op);
2550 switch (IntNo) {
2551 default: return SDValue(); // Don't custom lower most intrinsics.
2552 case Intrinsic::thread_pointer: {
2553 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2554 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2555 }
2556 case Intrinsic::aarch64_neon_abs:
2557 return DAG.getNode(ISD::ABS, dl, Op.getValueType(),
2558 Op.getOperand(1));
2559 case Intrinsic::aarch64_neon_smax:
2560 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2561 Op.getOperand(1), Op.getOperand(2));
2562 case Intrinsic::aarch64_neon_umax:
2563 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2564 Op.getOperand(1), Op.getOperand(2));
2565 case Intrinsic::aarch64_neon_smin:
2566 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2567 Op.getOperand(1), Op.getOperand(2));
2568 case Intrinsic::aarch64_neon_umin:
2569 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2570 Op.getOperand(1), Op.getOperand(2));
2571 }
2572}
2573
2574SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
2575 SelectionDAG &DAG) const {
2576 DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
2577 DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
2578
2579 switch (Op.getOpcode()) {
2580 default:
2581 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2581)
;
2582 return SDValue();
2583 case ISD::BITCAST:
2584 return LowerBITCAST(Op, DAG);
2585 case ISD::GlobalAddress:
2586 return LowerGlobalAddress(Op, DAG);
2587 case ISD::GlobalTLSAddress:
2588 return LowerGlobalTLSAddress(Op, DAG);
2589 case ISD::SETCC:
2590 return LowerSETCC(Op, DAG);
2591 case ISD::BR_CC:
2592 return LowerBR_CC(Op, DAG);
2593 case ISD::SELECT:
2594 return LowerSELECT(Op, DAG);
2595 case ISD::SELECT_CC:
2596 return LowerSELECT_CC(Op, DAG);
2597 case ISD::JumpTable:
2598 return LowerJumpTable(Op, DAG);
2599 case ISD::ConstantPool:
2600 return LowerConstantPool(Op, DAG);
2601 case ISD::BlockAddress:
2602 return LowerBlockAddress(Op, DAG);
2603 case ISD::VASTART:
2604 return LowerVASTART(Op, DAG);
2605 case ISD::VACOPY:
2606 return LowerVACOPY(Op, DAG);
2607 case ISD::VAARG:
2608 return LowerVAARG(Op, DAG);
2609 case ISD::ADDC:
2610 case ISD::ADDE:
2611 case ISD::SUBC:
2612 case ISD::SUBE:
2613 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
2614 case ISD::SADDO:
2615 case ISD::UADDO:
2616 case ISD::SSUBO:
2617 case ISD::USUBO:
2618 case ISD::SMULO:
2619 case ISD::UMULO:
2620 return LowerXALUO(Op, DAG);
2621 case ISD::FADD:
2622 return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
2623 case ISD::FSUB:
2624 return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
2625 case ISD::FMUL:
2626 return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
2627 case ISD::FDIV:
2628 return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
2629 case ISD::FP_ROUND:
2630 return LowerFP_ROUND(Op, DAG);
2631 case ISD::FP_EXTEND:
2632 return LowerFP_EXTEND(Op, DAG);
2633 case ISD::FRAMEADDR:
2634 return LowerFRAMEADDR(Op, DAG);
2635 case ISD::RETURNADDR:
2636 return LowerRETURNADDR(Op, DAG);
2637 case ISD::INSERT_VECTOR_ELT:
2638 return LowerINSERT_VECTOR_ELT(Op, DAG);
2639 case ISD::EXTRACT_VECTOR_ELT:
2640 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2641 case ISD::BUILD_VECTOR:
2642 return LowerBUILD_VECTOR(Op, DAG);
2643 case ISD::VECTOR_SHUFFLE:
2644 return LowerVECTOR_SHUFFLE(Op, DAG);
2645 case ISD::EXTRACT_SUBVECTOR:
2646 return LowerEXTRACT_SUBVECTOR(Op, DAG);
2647 case ISD::SRA:
2648 case ISD::SRL:
2649 case ISD::SHL:
2650 return LowerVectorSRA_SRL_SHL(Op, DAG);
2651 case ISD::SHL_PARTS:
2652 return LowerShiftLeftParts(Op, DAG);
2653 case ISD::SRL_PARTS:
2654 case ISD::SRA_PARTS:
2655 return LowerShiftRightParts(Op, DAG);
2656 case ISD::CTPOP:
2657 return LowerCTPOP(Op, DAG);
2658 case ISD::FCOPYSIGN:
2659 return LowerFCOPYSIGN(Op, DAG);
2660 case ISD::AND:
2661 return LowerVectorAND(Op, DAG);
2662 case ISD::OR:
2663 return LowerVectorOR(Op, DAG);
2664 case ISD::XOR:
2665 return LowerXOR(Op, DAG);
2666 case ISD::PREFETCH:
2667 return LowerPREFETCH(Op, DAG);
2668 case ISD::SINT_TO_FP:
2669 case ISD::UINT_TO_FP:
2670 return LowerINT_TO_FP(Op, DAG);
2671 case ISD::FP_TO_SINT:
2672 case ISD::FP_TO_UINT:
2673 return LowerFP_TO_INT(Op, DAG);
2674 case ISD::FSINCOS:
2675 return LowerFSINCOS(Op, DAG);
2676 case ISD::MUL:
2677 return LowerMUL(Op, DAG);
2678 case ISD::INTRINSIC_WO_CHAIN:
2679 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2680 case ISD::VECREDUCE_ADD:
2681 case ISD::VECREDUCE_SMAX:
2682 case ISD::VECREDUCE_SMIN:
2683 case ISD::VECREDUCE_UMAX:
2684 case ISD::VECREDUCE_UMIN:
2685 case ISD::VECREDUCE_FMAX:
2686 case ISD::VECREDUCE_FMIN:
2687 return LowerVECREDUCE(Op, DAG);
2688 }
2689}
2690
2691//===----------------------------------------------------------------------===//
2692// Calling Convention Implementation
2693//===----------------------------------------------------------------------===//
2694
2695#include "AArch64GenCallingConv.inc"
2696
2697/// Selects the correct CCAssignFn for a given CallingConvention value.
2698CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2699 bool IsVarArg) const {
2700 switch (CC) {
2701 default:
2702 report_fatal_error("Unsupported calling convention.");
2703 case CallingConv::WebKit_JS:
2704 return CC_AArch64_WebKit_JS;
2705 case CallingConv::GHC:
2706 return CC_AArch64_GHC;
2707 case CallingConv::C:
2708 case CallingConv::Fast:
2709 case CallingConv::PreserveMost:
2710 case CallingConv::CXX_FAST_TLS:
2711 case CallingConv::Swift:
2712 if (Subtarget->isTargetWindows() && IsVarArg)
2713 return CC_AArch64_Win64_VarArg;
2714 if (!Subtarget->isTargetDarwin())
2715 return CC_AArch64_AAPCS;
2716 return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
2717 case CallingConv::Win64:
2718 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
2719 }
2720}
2721
2722CCAssignFn *
2723AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
2724 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2725 : RetCC_AArch64_AAPCS;
2726}
2727
2728SDValue AArch64TargetLowering::LowerFormalArguments(
2729 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2730 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2731 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2732 MachineFunction &MF = DAG.getMachineFunction();
2733 MachineFrameInfo &MFI = MF.getFrameInfo();
2734 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv());
2735
2736 // Assign locations to all of the incoming arguments.
2737 SmallVector<CCValAssign, 16> ArgLocs;
2738 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2739 *DAG.getContext());
2740
2741 // At this point, Ins[].VT may already be promoted to i32. To correctly
2742 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
2743 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
2744 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
2745 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
2746 // LocVT.
2747 unsigned NumArgs = Ins.size();
2748 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
2749 unsigned CurArgIdx = 0;
2750 for (unsigned i = 0; i != NumArgs; ++i) {
2751 MVT ValVT = Ins[i].VT;
2752 if (Ins[i].isOrigArg()) {
2753 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
2754 CurArgIdx = Ins[i].getOrigArgIndex();
2755
2756 // Get type of the original argument.
2757 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
2758 /*AllowUnknown*/ true);
2759 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
2760 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
2761 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
2762 ValVT = MVT::i8;
2763 else if (ActualMVT == MVT::i16)
2764 ValVT = MVT::i16;
2765 }
2766 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
2767 bool Res =
2768 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
2769 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2769, __PRETTY_FUNCTION__))
;
2770 (void)Res;
2771 }
2772 assert(ArgLocs.size() == Ins.size())((ArgLocs.size() == Ins.size()) ? static_cast<void> (0)
: __assert_fail ("ArgLocs.size() == Ins.size()", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2772, __PRETTY_FUNCTION__))
;
2773 SmallVector<SDValue, 16> ArgValues;
2774 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2775 CCValAssign &VA = ArgLocs[i];
2776
2777 if (Ins[i].Flags.isByVal()) {
2778 // Byval is used for HFAs in the PCS, but the system should work in a
2779 // non-compliant manner for larger structs.
2780 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2781 int Size = Ins[i].Flags.getByValSize();
2782 unsigned NumRegs = (Size + 7) / 8;
2783
2784 // FIXME: This works on big-endian for composite byvals, which are the common
2785 // case. It should also work for fundamental types too.
2786 unsigned FrameIdx =
2787 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
2788 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
2789 InVals.push_back(FrameIdxN);
2790
2791 continue;
2792 }
2793
2794 if (VA.isRegLoc()) {
2795 // Arguments stored in registers.
2796 EVT RegVT = VA.getLocVT();
2797
2798 SDValue ArgValue;
2799 const TargetRegisterClass *RC;
2800
2801 if (RegVT == MVT::i32)
2802 RC = &AArch64::GPR32RegClass;
2803 else if (RegVT == MVT::i64)
2804 RC = &AArch64::GPR64RegClass;
2805 else if (RegVT == MVT::f16)
2806 RC = &AArch64::FPR16RegClass;
2807 else if (RegVT == MVT::f32)
2808 RC = &AArch64::FPR32RegClass;
2809 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
2810 RC = &AArch64::FPR64RegClass;
2811 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
2812 RC = &AArch64::FPR128RegClass;
2813 else
2814 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2814)
;
2815
2816 // Transform the arguments in physical registers into virtual ones.
2817 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2818 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
2819
2820 // If this is an 8, 16 or 32-bit value, it is really passed promoted
2821 // to 64 bits. Insert an assert[sz]ext to capture this, then
2822 // truncate to the right size.
2823 switch (VA.getLocInfo()) {
2824 default:
2825 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2825)
;
2826 case CCValAssign::Full:
2827 break;
2828 case CCValAssign::BCvt:
2829 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
2830 break;
2831 case CCValAssign::AExt:
2832 case CCValAssign::SExt:
2833 case CCValAssign::ZExt:
2834 // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
2835 // nodes after our lowering.
2836 assert(RegVT == Ins[i].VT && "incorrect register location selected")((RegVT == Ins[i].VT && "incorrect register location selected"
) ? static_cast<void> (0) : __assert_fail ("RegVT == Ins[i].VT && \"incorrect register location selected\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2836, __PRETTY_FUNCTION__))
;
2837 break;
2838 }
2839
2840 InVals.push_back(ArgValue);
2841
2842 } else { // VA.isRegLoc()
2843 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")((VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2843, __PRETTY_FUNCTION__))
;
2844 unsigned ArgOffset = VA.getLocMemOffset();
2845 unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
2846
2847 uint32_t BEAlign = 0;
2848 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
2849 !Ins[i].Flags.isInConsecutiveRegs())
2850 BEAlign = 8 - ArgSize;
2851
2852 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
2853
2854 // Create load nodes to retrieve arguments from the stack.
2855 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2856 SDValue ArgValue;
2857
2858 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
2859 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
2860 MVT MemVT = VA.getValVT();
2861
2862 switch (VA.getLocInfo()) {
2863 default:
2864 break;
2865 case CCValAssign::BCvt:
2866 MemVT = VA.getLocVT();
2867 break;
2868 case CCValAssign::SExt:
2869 ExtType = ISD::SEXTLOAD;
2870 break;
2871 case CCValAssign::ZExt:
2872 ExtType = ISD::ZEXTLOAD;
2873 break;
2874 case CCValAssign::AExt:
2875 ExtType = ISD::EXTLOAD;
2876 break;
2877 }
2878
2879 ArgValue = DAG.getExtLoad(
2880 ExtType, DL, VA.getLocVT(), Chain, FIN,
2881 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
2882 MemVT);
2883
2884 InVals.push_back(ArgValue);
2885 }
2886 }
2887
2888 // varargs
2889 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2890 if (isVarArg) {
2891 if (!Subtarget->isTargetDarwin() || IsWin64) {
2892 // The AAPCS variadic function ABI is identical to the non-variadic
2893 // one. As a result there may be more arguments in registers and we should
2894 // save them for future reference.
2895 // Win64 variadic functions also pass arguments in registers, but all float
2896 // arguments are passed in integer registers.
2897 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
2898 }
2899
2900 // This will point to the next argument passed via stack.
2901 unsigned StackOffset = CCInfo.getNextStackOffset();
2902 // We currently pass all varargs at 8-byte alignment.
2903 StackOffset = ((StackOffset + 7) & ~7);
2904 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
2905 }
2906
2907 unsigned StackArgSize = CCInfo.getNextStackOffset();
2908 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
2909 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
2910 // This is a non-standard ABI so by fiat I say we're allowed to make full
2911 // use of the stack area to be popped, which must be aligned to 16 bytes in
2912 // any case:
2913 StackArgSize = alignTo(StackArgSize, 16);
2914
2915 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
2916 // a multiple of 16.
2917 FuncInfo->setArgumentStackToRestore(StackArgSize);
2918
2919 // This realignment carries over to the available bytes below. Our own
2920 // callers will guarantee the space is free by giving an aligned value to
2921 // CALLSEQ_START.
2922 }
2923 // Even if we're not expected to free up the space, it's useful to know how
2924 // much is there while considering tail calls (because we can reuse it).
2925 FuncInfo->setBytesInStackArgArea(StackArgSize);
2926
2927 return Chain;
2928}
2929
2930void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
2931 SelectionDAG &DAG,
2932 const SDLoc &DL,
2933 SDValue &Chain) const {
2934 MachineFunction &MF = DAG.getMachineFunction();
2935 MachineFrameInfo &MFI = MF.getFrameInfo();
2936 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2937 auto PtrVT = getPointerTy(DAG.getDataLayout());
2938 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv());
2939
2940 SmallVector<SDValue, 8> MemOps;
2941
2942 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
2943 AArch64::X3, AArch64::X4, AArch64::X5,
2944 AArch64::X6, AArch64::X7 };
2945 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
2946 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
2947
2948 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
2949 int GPRIdx = 0;
2950 if (GPRSaveSize != 0) {
2951 if (IsWin64) {
2952 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
2953 if (GPRSaveSize & 15)
2954 // The extra size here, if triggered, will always be 8.
2955 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
2956 } else
2957 GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
2958
2959 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
2960
2961 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
2962 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
2963 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
2964 SDValue Store = DAG.getStore(
2965 Val.getValue(1), DL, Val, FIN,
2966 IsWin64
2967 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
2968 GPRIdx,
2969 (i - FirstVariadicGPR) * 8)
2970 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
2971 MemOps.push_back(Store);
2972 FIN =
2973 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
2974 }
2975 }
2976 FuncInfo->setVarArgsGPRIndex(GPRIdx);
2977 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
2978
2979 if (Subtarget->hasFPARMv8() && !IsWin64) {
2980 static const MCPhysReg FPRArgRegs[] = {
2981 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
2982 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
2983 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
2984 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
2985
2986 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
2987 int FPRIdx = 0;
2988 if (FPRSaveSize != 0) {
2989 FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
2990
2991 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
2992
2993 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
2994 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
2995 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
2996
2997 SDValue Store = DAG.getStore(
2998 Val.getValue(1), DL, Val, FIN,
2999 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
3000 MemOps.push_back(Store);
3001 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3002 DAG.getConstant(16, DL, PtrVT));
3003 }
3004 }
3005 FuncInfo->setVarArgsFPRIndex(FPRIdx);
3006 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3007 }
3008
3009 if (!MemOps.empty()) {
3010 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3011 }
3012}
3013
3014/// LowerCallResult - Lower the result values of a call into the
3015/// appropriate copies out of appropriate physical registers.
3016SDValue AArch64TargetLowering::LowerCallResult(
3017 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3018 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3019 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3020 SDValue ThisVal) const {
3021 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3022 ? RetCC_AArch64_WebKit_JS
3023 : RetCC_AArch64_AAPCS;
3024 // Assign locations to each value returned by this call.
3025 SmallVector<CCValAssign, 16> RVLocs;
3026 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3027 *DAG.getContext());
3028 CCInfo.AnalyzeCallResult(Ins, RetCC);
3029
3030 // Copy all of the result registers out of their specified physreg.
3031 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3032 CCValAssign VA = RVLocs[i];
3033
3034 // Pass 'this' value directly from the argument to return value, to avoid
3035 // reg unit interference
3036 if (i == 0 && isThisReturn) {
3037 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3038, __PRETTY_FUNCTION__))
3038 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3038, __PRETTY_FUNCTION__))
;
3039 InVals.push_back(ThisVal);
3040 continue;
3041 }
3042
3043 SDValue Val =
3044 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3045 Chain = Val.getValue(1);
3046 InFlag = Val.getValue(2);
3047
3048 switch (VA.getLocInfo()) {
3049 default:
3050 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3050)
;
3051 case CCValAssign::Full:
3052 break;
3053 case CCValAssign::BCvt:
3054 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3055 break;
3056 }
3057
3058 InVals.push_back(Val);
3059 }
3060
3061 return Chain;
3062}
3063
3064/// Return true if the calling convention is one that we can guarantee TCO for.
3065static bool canGuaranteeTCO(CallingConv::ID CC) {
3066 return CC == CallingConv::Fast;
3067}
3068
3069/// Return true if we might ever do TCO for calls with this calling convention.
3070static bool mayTailCallThisCC(CallingConv::ID CC) {
3071 switch (CC) {
3072 case CallingConv::C:
3073 case CallingConv::PreserveMost:
3074 case CallingConv::Swift:
3075 return true;
3076 default:
3077 return canGuaranteeTCO(CC);
3078 }
3079}
3080
3081bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3082 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3083 const SmallVectorImpl<ISD::OutputArg> &Outs,
3084 const SmallVectorImpl<SDValue> &OutVals,
3085 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3086 if (!mayTailCallThisCC(CalleeCC))
3087 return false;
3088
3089 MachineFunction &MF = DAG.getMachineFunction();
3090 const Function *CallerF = MF.getFunction();
3091 CallingConv::ID CallerCC = CallerF->getCallingConv();
3092 bool CCMatch = CallerCC == CalleeCC;
3093
3094 // Byval parameters hand the function a pointer directly into the stack area
3095 // we want to reuse during a tail call. Working around this *is* possible (see
3096 // X86) but less efficient and uglier in LowerCall.
3097 for (Function::const_arg_iterator i = CallerF->arg_begin(),
3098 e = CallerF->arg_end();
3099 i != e; ++i)
3100 if (i->hasByValAttr())
3101 return false;
3102
3103 if (getTargetMachine().Options.GuaranteedTailCallOpt)
3104 return canGuaranteeTCO(CalleeCC) && CCMatch;
3105
3106 // Externally-defined functions with weak linkage should not be
3107 // tail-called on AArch64 when the OS does not support dynamic
3108 // pre-emption of symbols, as the AAELF spec requires normal calls
3109 // to undefined weak functions to be replaced with a NOP or jump to the
3110 // next instruction. The behaviour of branch instructions in this
3111 // situation (as used for tail calls) is implementation-defined, so we
3112 // cannot rely on the linker replacing the tail call with a return.
3113 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3114 const GlobalValue *GV = G->getGlobal();
3115 const Triple &TT = getTargetMachine().getTargetTriple();
3116 if (GV->hasExternalWeakLinkage() &&
3117 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3118 return false;
3119 }
3120
3121 // Now we search for cases where we can use a tail call without changing the
3122 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3123 // concept.
3124
3125 // I want anyone implementing a new calling convention to think long and hard
3126 // about this assert.
3127 assert((!isVarArg || CalleeCC == CallingConv::C) &&(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3128, __PRETTY_FUNCTION__))
3128 "Unexpected variadic calling convention")(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3128, __PRETTY_FUNCTION__))
;
3129
3130 LLVMContext &C = *DAG.getContext();
3131 if (isVarArg && !Outs.empty()) {
3132 // At least two cases here: if caller is fastcc then we can't have any
3133 // memory arguments (we'd be expected to clean up the stack afterwards). If
3134 // caller is C then we could potentially use its argument area.
3135
3136 // FIXME: for now we take the most conservative of these in both cases:
3137 // disallow all variadic memory operands.
3138 SmallVector<CCValAssign, 16> ArgLocs;
3139 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3140
3141 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3142 for (const CCValAssign &ArgLoc : ArgLocs)
3143 if (!ArgLoc.isRegLoc())
3144 return false;
3145 }
3146
3147 // Check that the call results are passed in the same way.
3148 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3149 CCAssignFnForCall(CalleeCC, isVarArg),
3150 CCAssignFnForCall(CallerCC, isVarArg)))
3151 return false;
3152 // The callee has to preserve all registers the caller needs to preserve.
3153 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3154 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3155 if (!CCMatch) {
3156 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3157 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3158 return false;
3159 }
3160
3161 // Nothing more to check if the callee is taking no arguments
3162 if (Outs.empty())
3163 return true;
3164
3165 SmallVector<CCValAssign, 16> ArgLocs;
3166 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3167
3168 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3169
3170 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3171
3172 // If the stack arguments for this call do not fit into our own save area then
3173 // the call cannot be made tail.
3174 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3175 return false;
3176
3177 const MachineRegisterInfo &MRI = MF.getRegInfo();
3178 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3179 return false;
3180
3181 return true;
3182}
3183
3184SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3185 SelectionDAG &DAG,
3186 MachineFrameInfo &MFI,
3187 int ClobberedFI) const {
3188 SmallVector<SDValue, 8> ArgChains;
3189 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3190 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3191
3192 // Include the original chain at the beginning of the list. When this is
3193 // used by target LowerCall hooks, this helps legalize find the
3194 // CALLSEQ_BEGIN node.
3195 ArgChains.push_back(Chain);
3196
3197 // Add a chain value for each stack argument corresponding
3198 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
3199 UE = DAG.getEntryNode().getNode()->use_end();
3200 U != UE; ++U)
3201 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
3202 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
3203 if (FI->getIndex() < 0) {
3204 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
3205 int64_t InLastByte = InFirstByte;
3206 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
3207
3208 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
3209 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
3210 ArgChains.push_back(SDValue(L, 1));
3211 }
3212
3213 // Build a tokenfactor for all the chains.
3214 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
3215}
3216
3217bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
3218 bool TailCallOpt) const {
3219 return CallCC == CallingConv::Fast && TailCallOpt;
3220}
3221
3222/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
3223/// and add input and output parameter nodes.
3224SDValue
3225AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
3226 SmallVectorImpl<SDValue> &InVals) const {
3227 SelectionDAG &DAG = CLI.DAG;
3228 SDLoc &DL = CLI.DL;
3229 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
3230 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
3231 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
3232 SDValue Chain = CLI.Chain;
3233 SDValue Callee = CLI.Callee;
3234 bool &IsTailCall = CLI.IsTailCall;
3235 CallingConv::ID CallConv = CLI.CallConv;
3236 bool IsVarArg = CLI.IsVarArg;
3237
3238 MachineFunction &MF = DAG.getMachineFunction();
3239 bool IsThisReturn = false;
3240
3241 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3242 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3243 bool IsSibCall = false;
3244
3245 if (IsTailCall) {
3246 // Check if it's really possible to do a tail call.
3247 IsTailCall = isEligibleForTailCallOptimization(
3248 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3249 if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
3250 report_fatal_error("failed to perform tail call elimination on a call "
3251 "site marked musttail");
3252
3253 // A sibling call is one where we're under the usual C ABI and not planning
3254 // to change that but can still do a tail call:
3255 if (!TailCallOpt && IsTailCall)
3256 IsSibCall = true;
3257
3258 if (IsTailCall)
3259 ++NumTailCalls;
3260 }
3261
3262 // Analyze operands of the call, assigning locations to each operand.
3263 SmallVector<CCValAssign, 16> ArgLocs;
3264 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
3265 *DAG.getContext());
3266
3267 if (IsVarArg) {
3268 // Handle fixed and variable vector arguments differently.
3269 // Variable vector arguments always go into memory.
3270 unsigned NumArgs = Outs.size();
3271
3272 for (unsigned i = 0; i != NumArgs; ++i) {
3273 MVT ArgVT = Outs[i].VT;
3274 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3275 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
3276 /*IsVarArg=*/ !Outs[i].IsFixed);
3277 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
3278 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3278, __PRETTY_FUNCTION__))
;
3279 (void)Res;
3280 }
3281 } else {
3282 // At this point, Outs[].VT may already be promoted to i32. To correctly
3283 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3284 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3285 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
3286 // we use a special version of AnalyzeCallOperands to pass in ValVT and
3287 // LocVT.
3288 unsigned NumArgs = Outs.size();
3289 for (unsigned i = 0; i != NumArgs; ++i) {
3290 MVT ValVT = Outs[i].VT;
3291 // Get type of the original argument.
3292 EVT ActualVT = getValueType(DAG.getDataLayout(),
3293 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
3294 /*AllowUnknown*/ true);
3295 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
3296 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3297 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3298 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3299 ValVT = MVT::i8;
3300 else if (ActualMVT == MVT::i16)
3301 ValVT = MVT::i16;
3302
3303 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3304 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
3305 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3305, __PRETTY_FUNCTION__))
;
3306 (void)Res;
3307 }
3308 }
3309
3310 // Get a count of how many bytes are to be pushed on the stack.
3311 unsigned NumBytes = CCInfo.getNextStackOffset();
3312
3313 if (IsSibCall) {
3314 // Since we're not changing the ABI to make this a tail call, the memory
3315 // operands are already available in the caller's incoming argument space.
3316 NumBytes = 0;
3317 }
3318
3319 // FPDiff is the byte offset of the call's argument area from the callee's.
3320 // Stores to callee stack arguments will be placed in FixedStackSlots offset
3321 // by this amount for a tail call. In a sibling call it must be 0 because the
3322 // caller will deallocate the entire stack and the callee still expects its
3323 // arguments to begin at SP+0. Completely unused for non-tail calls.
3324 int FPDiff = 0;
3325
3326 if (IsTailCall && !IsSibCall) {
3327 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
3328
3329 // Since callee will pop argument stack as a tail call, we must keep the
3330 // popped size 16-byte aligned.
3331 NumBytes = alignTo(NumBytes, 16);
3332
3333 // FPDiff will be negative if this tail call requires more space than we
3334 // would automatically have in our incoming argument space. Positive if we
3335 // can actually shrink the stack.
3336 FPDiff = NumReusableBytes - NumBytes;
3337
3338 // The stack pointer must be 16-byte aligned at all times it's used for a
3339 // memory operation, which in practice means at *all* times and in
3340 // particular across call boundaries. Therefore our own arguments started at
3341 // a 16-byte aligned SP and the delta applied for the tail call should
3342 // satisfy the same constraint.
3343 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")((FPDiff % 16 == 0 && "unaligned stack on tail call")
? static_cast<void> (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3343, __PRETTY_FUNCTION__))
;
3344 }
3345
3346 // Adjust the stack pointer for the new arguments...
3347 // These operations are automatically eliminated by the prolog/epilog pass
3348 if (!IsSibCall)
3349 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
3350
3351 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
3352 getPointerTy(DAG.getDataLayout()));
3353
3354 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3355 SmallVector<SDValue, 8> MemOpChains;
3356 auto PtrVT = getPointerTy(DAG.getDataLayout());
3357
3358 // Walk the register/memloc assignments, inserting copies/loads.
3359 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
3360 ++i, ++realArgIdx) {
3361 CCValAssign &VA = ArgLocs[i];
3362 SDValue Arg = OutVals[realArgIdx];
3363 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3364
3365 // Promote the value if needed.
3366 switch (VA.getLocInfo()) {
3367 default:
3368 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3368)
;
3369 case CCValAssign::Full:
3370 break;
3371 case CCValAssign::SExt:
3372 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
3373 break;
3374 case CCValAssign::ZExt:
3375 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3376 break;
3377 case CCValAssign::AExt:
3378 if (Outs[realArgIdx].ArgVT == MVT::i1) {
3379 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
3380 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3381 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
3382 }
3383 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
3384 break;
3385 case CCValAssign::BCvt:
3386 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3387 break;
3388 case CCValAssign::FPExt:
3389 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
3390 break;
3391 }
3392
3393 if (VA.isRegLoc()) {
3394 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
3395 Outs[0].VT == MVT::i64) {
3396 assert(VA.getLocVT() == MVT::i64 &&((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3397, __PRETTY_FUNCTION__))
3397 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3397, __PRETTY_FUNCTION__))
;
3398 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3399, __PRETTY_FUNCTION__))
3399 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3399, __PRETTY_FUNCTION__))
;
3400 IsThisReturn = true;
3401 }
3402 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3403 } else {
3404 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3404, __PRETTY_FUNCTION__))
;
3405
3406 SDValue DstAddr;
3407 MachinePointerInfo DstInfo;
3408
3409 // FIXME: This works on big-endian for composite byvals, which are the
3410 // common case. It should also work for fundamental types too.
3411 uint32_t BEAlign = 0;
3412 unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
3413 : VA.getValVT().getSizeInBits();
3414 OpSize = (OpSize + 7) / 8;
3415 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
3416 !Flags.isInConsecutiveRegs()) {
3417 if (OpSize < 8)
3418 BEAlign = 8 - OpSize;
3419 }
3420 unsigned LocMemOffset = VA.getLocMemOffset();
3421 int32_t Offset = LocMemOffset + BEAlign;
3422 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3423 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3424
3425 if (IsTailCall) {
3426 Offset = Offset + FPDiff;
3427 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3428
3429 DstAddr = DAG.getFrameIndex(FI, PtrVT);
3430 DstInfo =
3431 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
3432
3433 // Make sure any stack arguments overlapping with where we're storing
3434 // are loaded before this eventual operation. Otherwise they'll be
3435 // clobbered.
3436 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
3437 } else {
3438 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3439
3440 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3441 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
3442 LocMemOffset);
3443 }
3444
3445 if (Outs[i].Flags.isByVal()) {
3446 SDValue SizeNode =
3447 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
3448 SDValue Cpy = DAG.getMemcpy(
3449 Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
3450 /*isVol = */ false, /*AlwaysInline = */ false,
3451 /*isTailCall = */ false,
3452 DstInfo, MachinePointerInfo());
3453
3454 MemOpChains.push_back(Cpy);
3455 } else {
3456 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
3457 // promoted to a legal register type i32, we should truncate Arg back to
3458 // i1/i8/i16.
3459 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
3460 VA.getValVT() == MVT::i16)
3461 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
3462
3463 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
3464 MemOpChains.push_back(Store);
3465 }
3466 }
3467 }
3468
3469 if (!MemOpChains.empty())
3470 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3471
3472 // Build a sequence of copy-to-reg nodes chained together with token chain
3473 // and flag operands which copy the outgoing args into the appropriate regs.
3474 SDValue InFlag;
3475 for (auto &RegToPass : RegsToPass) {
3476 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
3477 RegToPass.second, InFlag);
3478 InFlag = Chain.getValue(1);
3479 }
3480
3481 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
3482 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
3483 // node so that legalize doesn't hack it.
3484 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3485 auto GV = G->getGlobal();
3486 if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) ==
3487 AArch64II::MO_GOT) {
3488 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
3489 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3490 } else {
3491 const GlobalValue *GV = G->getGlobal();
3492 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3493 }
3494 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3495 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3496 Subtarget->isTargetMachO()) {
3497 const char *Sym = S->getSymbol();
3498 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
3499 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3500 } else {
3501 const char *Sym = S->getSymbol();
3502 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
3503 }
3504 }
3505
3506 // We don't usually want to end the call-sequence here because we would tidy
3507 // the frame up *after* the call, however in the ABI-changing tail-call case
3508 // we've carefully laid out the parameters so that when sp is reset they'll be
3509 // in the correct location.
3510 if (IsTailCall && !IsSibCall) {
3511 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3512 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
3513 InFlag = Chain.getValue(1);
3514 }
3515
3516 std::vector<SDValue> Ops;
3517 Ops.push_back(Chain);
3518 Ops.push_back(Callee);
3519
3520 if (IsTailCall) {
3521 // Each tail call may have to adjust the stack by a different amount, so
3522 // this information must travel along with the operation for eventual
3523 // consumption by emitEpilogue.
3524 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
3525 }
3526
3527 // Add argument registers to the end of the list so that they are known live
3528 // into the call.
3529 for (auto &RegToPass : RegsToPass)
3530 Ops.push_back(DAG.getRegister(RegToPass.first,
3531 RegToPass.second.getValueType()));
3532
3533 // Add a register mask operand representing the call-preserved registers.
3534 const uint32_t *Mask;
3535 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3536 if (IsThisReturn) {
3537 // For 'this' returns, use the X0-preserving mask if applicable
3538 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
3539 if (!Mask) {
3540 IsThisReturn = false;
3541 Mask = TRI->getCallPreservedMask(MF, CallConv);
3542 }
3543 } else
3544 Mask = TRI->getCallPreservedMask(MF, CallConv);
3545
3546 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3546, __PRETTY_FUNCTION__))
;
3547 Ops.push_back(DAG.getRegisterMask(Mask));
3548
3549 if (InFlag.getNode())
3550 Ops.push_back(InFlag);
3551
3552 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3553
3554 // If we're doing a tall call, use a TC_RETURN here rather than an
3555 // actual call instruction.
3556 if (IsTailCall) {
3557 MF.getFrameInfo().setHasTailCall();
3558 return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
3559 }
3560
3561 // Returns a chain and a flag for retval copy to use.
3562 Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
3563 InFlag = Chain.getValue(1);
3564
3565 uint64_t CalleePopBytes =
3566 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
3567
3568 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3569 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
3570 InFlag, DL);
3571 if (!Ins.empty())
3572 InFlag = Chain.getValue(1);
3573
3574 // Handle result values, copying them out of physregs into vregs that we
3575 // return.
3576 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
3577 InVals, IsThisReturn,
3578 IsThisReturn ? OutVals[0] : SDValue());
3579}
3580
3581bool AArch64TargetLowering::CanLowerReturn(
3582 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3583 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3584 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3585 ? RetCC_AArch64_WebKit_JS
3586 : RetCC_AArch64_AAPCS;
3587 SmallVector<CCValAssign, 16> RVLocs;
3588 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3589 return CCInfo.CheckReturn(Outs, RetCC);
3590}
3591
3592SDValue
3593AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3594 bool isVarArg,
3595 const SmallVectorImpl<ISD::OutputArg> &Outs,
3596 const SmallVectorImpl<SDValue> &OutVals,
3597 const SDLoc &DL, SelectionDAG &DAG) const {
3598 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3599 ? RetCC_AArch64_WebKit_JS
3600 : RetCC_AArch64_AAPCS;
3601 SmallVector<CCValAssign, 16> RVLocs;
3602 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3603 *DAG.getContext());
3604 CCInfo.AnalyzeReturn(Outs, RetCC);
3605
3606 // Copy the result values into the output registers.
3607 SDValue Flag;
3608 SmallVector<SDValue, 4> RetOps(1, Chain);
3609 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
3610 ++i, ++realRVLocIdx) {
3611 CCValAssign &VA = RVLocs[i];
3612 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3612, __PRETTY_FUNCTION__))
;
3613 SDValue Arg = OutVals[realRVLocIdx];
3614
3615 switch (VA.getLocInfo()) {
3616 default:
3617 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3617)
;
3618 case CCValAssign::Full:
3619 if (Outs[i].ArgVT == MVT::i1) {
3620 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
3621 // value. This is strictly redundant on Darwin (which uses "zeroext
3622 // i1"), but will be optimised out before ISel.
3623 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3624 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3625 }
3626 break;
3627 case CCValAssign::BCvt:
3628 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3629 break;
3630 }
3631
3632 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
3633 Flag = Chain.getValue(1);
3634 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3635 }
3636 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3637 const MCPhysReg *I =
3638 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3639 if (I) {
3640 for (; *I; ++I) {
3641 if (AArch64::GPR64RegClass.contains(*I))
3642 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3643 else if (AArch64::FPR64RegClass.contains(*I))
3644 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3645 else
3646 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3646)
;
3647 }
3648 }
3649
3650 RetOps[0] = Chain; // Update chain.
3651
3652 // Add the flag if we have it.
3653 if (Flag.getNode())
3654 RetOps.push_back(Flag);
3655
3656 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
3657}
3658
3659//===----------------------------------------------------------------------===//
3660// Other Lowering Code
3661//===----------------------------------------------------------------------===//
3662
3663SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
3664 SelectionDAG &DAG,
3665 unsigned Flag) const {
3666 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
3667}
3668
3669SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
3670 SelectionDAG &DAG,
3671 unsigned Flag) const {
3672 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
3673}
3674
3675SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
3676 SelectionDAG &DAG,
3677 unsigned Flag) const {
3678 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
3679 N->getOffset(), Flag);
3680}
3681
3682SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
3683 SelectionDAG &DAG,
3684 unsigned Flag) const {
3685 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
3686}
3687
3688// (loadGOT sym)
3689template <class NodeTy>
3690SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG) const {
3691 DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
3692 SDLoc DL(N);
3693 EVT Ty = getPointerTy(DAG.getDataLayout());
3694 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT);
3695 // FIXME: Once remat is capable of dealing with instructions with register
3696 // operands, expand this into two nodes instead of using a wrapper node.
3697 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
3698}
3699
3700// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
3701template <class NodeTy>
3702SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG)
3703 const {
3704 DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
3705 SDLoc DL(N);
3706 EVT Ty = getPointerTy(DAG.getDataLayout());
3707 const unsigned char MO_NC = AArch64II::MO_NC;
3708 return DAG.getNode(
3709 AArch64ISD::WrapperLarge, DL, Ty,
3710 getTargetNode(N, Ty, DAG, AArch64II::MO_G3),
3711 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC),
3712 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC),
3713 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC));
3714}
3715
3716// (addlow (adrp %hi(sym)) %lo(sym))
3717template <class NodeTy>
3718SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG) const {
3719 DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
3720 SDLoc DL(N);
3721 EVT Ty = getPointerTy(DAG.getDataLayout());
3722 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE);
3723 SDValue Lo = getTargetNode(N, Ty, DAG,
3724 AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3725 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
3726 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
3727}
3728
3729SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
3730 SelectionDAG &DAG) const {
3731 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
3732 const GlobalValue *GV = GN->getGlobal();
3733 unsigned char OpFlags =
3734 Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
3735
3736 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3737, __PRETTY_FUNCTION__))
3737 "unexpected offset in global node")((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3737, __PRETTY_FUNCTION__))
;
3738
3739 // This also catches the large code model case for Darwin.
3740 if ((OpFlags & AArch64II::MO_GOT) != 0) {
3741 return getGOT(GN, DAG);
3742 }
3743
3744 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
3745 return getAddrLarge(GN, DAG);
3746 } else {
3747 return getAddr(GN, DAG);
3748 }
3749}
3750
3751/// \brief Convert a TLS address reference into the correct sequence of loads
3752/// and calls to compute the variable's address (for Darwin, currently) and
3753/// return an SDValue containing the final node.
3754
3755/// Darwin only has one TLS scheme which must be capable of dealing with the
3756/// fully general situation, in the worst case. This means:
3757/// + "extern __thread" declaration.
3758/// + Defined in a possibly unknown dynamic library.
3759///
3760/// The general system is that each __thread variable has a [3 x i64] descriptor
3761/// which contains information used by the runtime to calculate the address. The
3762/// only part of this the compiler needs to know about is the first xword, which
3763/// contains a function pointer that must be called with the address of the
3764/// entire descriptor in "x0".
3765///
3766/// Since this descriptor may be in a different unit, in general even the
3767/// descriptor must be accessed via an indirect load. The "ideal" code sequence
3768/// is:
3769/// adrp x0, _var@TLVPPAGE
3770/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
3771/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
3772/// ; the function pointer
3773/// blr x1 ; Uses descriptor address in x0
3774/// ; Address of _var is now in x0.
3775///
3776/// If the address of _var's descriptor *is* known to the linker, then it can
3777/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
3778/// a slight efficiency gain.
3779SDValue
3780AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
3781 SelectionDAG &DAG) const {
3782 assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin")((Subtarget->isTargetDarwin() && "TLS only supported on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"TLS only supported on Darwin\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3782, __PRETTY_FUNCTION__))
;
3783
3784 SDLoc DL(Op);
3785 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3786 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3787
3788 SDValue TLVPAddr =
3789 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3790 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
3791
3792 // The first entry in the descriptor is a function pointer that we must call
3793 // to obtain the address of the variable.
3794 SDValue Chain = DAG.getEntryNode();
3795 SDValue FuncTLVGet = DAG.getLoad(
3796 MVT::i64, DL, Chain, DescAddr,
3797 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
3798 /* Alignment = */ 8,
3799 MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant |
3800 MachineMemOperand::MODereferenceable);
3801 Chain = FuncTLVGet.getValue(1);
3802
3803 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
3804 MFI.setAdjustsStack(true);
3805
3806 // TLS calls preserve all registers except those that absolutely must be
3807 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3808 // silly).
3809 const uint32_t *Mask =
3810 Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
3811
3812 // Finally, we can make the call. This is just a degenerate version of a
3813 // normal AArch64 call node: x0 takes the address of the descriptor, and
3814 // returns the address of the variable in this thread.
3815 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
3816 Chain =
3817 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3818 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
3819 DAG.getRegisterMask(Mask), Chain.getValue(1));
3820 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
3821}
3822
3823/// When accessing thread-local variables under either the general-dynamic or
3824/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
3825/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
3826/// is a function pointer to carry out the resolution.
3827///
3828/// The sequence is:
3829/// adrp x0, :tlsdesc:var
3830/// ldr x1, [x0, #:tlsdesc_lo12:var]
3831/// add x0, x0, #:tlsdesc_lo12:var
3832/// .tlsdesccall var
3833/// blr x1
3834/// (TPIDR_EL0 offset now in x0)
3835///
3836/// The above sequence must be produced unscheduled, to enable the linker to
3837/// optimize/relax this sequence.
3838/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
3839/// above sequence, and expanded really late in the compilation flow, to ensure
3840/// the sequence is produced as per above.
3841SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
3842 const SDLoc &DL,
3843 SelectionDAG &DAG) const {
3844 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3845
3846 SDValue Chain = DAG.getEntryNode();
3847 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3848
3849 Chain =
3850 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
3851 SDValue Glue = Chain.getValue(1);
3852
3853 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
3854}
3855
3856SDValue
3857AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
3858 SelectionDAG &DAG) const {
3859 assert(Subtarget->isTargetELF() && "This function expects an ELF target")((Subtarget->isTargetELF() && "This function expects an ELF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3859, __PRETTY_FUNCTION__))
;
3860 assert(Subtarget->useSmallAddressing() &&((Subtarget->useSmallAddressing() && "ELF TLS only supported in small memory model"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useSmallAddressing() && \"ELF TLS only supported in small memory model\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3861, __PRETTY_FUNCTION__))
3861 "ELF TLS only supported in small memory model")((Subtarget->useSmallAddressing() && "ELF TLS only supported in small memory model"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useSmallAddressing() && \"ELF TLS only supported in small memory model\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3861, __PRETTY_FUNCTION__))
;
3862 // Different choices can be made for the maximum size of the TLS area for a
3863 // module. For the small address model, the default TLS size is 16MiB and the
3864 // maximum TLS size is 4GiB.
3865 // FIXME: add -mtls-size command line option and make it control the 16MiB
3866 // vs. 4GiB code sequence generation.
3867 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3868
3869 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
3870
3871 if (DAG.getTarget().Options.EmulatedTLS)
3872 return LowerToTLSEmulatedModel(GA, DAG);
3873
3874 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
3875 if (Model == TLSModel::LocalDynamic)
3876 Model = TLSModel::GeneralDynamic;
3877 }
3878
3879 SDValue TPOff;
3880 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3881 SDLoc DL(Op);
3882 const GlobalValue *GV = GA->getGlobal();
3883
3884 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
3885
3886 if (Model == TLSModel::LocalExec) {
3887 SDValue HiVar = DAG.getTargetGlobalAddress(
3888 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
3889 SDValue LoVar = DAG.getTargetGlobalAddress(
3890 GV, DL, PtrVT, 0,
3891 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3892
3893 SDValue TPWithOff_lo =
3894 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
3895 HiVar,
3896 DAG.getTargetConstant(0, DL, MVT::i32)),
3897 0);
3898 SDValue TPWithOff =
3899 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
3900 LoVar,
3901 DAG.getTargetConstant(0, DL, MVT::i32)),
3902 0);
3903 return TPWithOff;
3904 } else if (Model == TLSModel::InitialExec) {
3905 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3906 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
3907 } else if (Model == TLSModel::LocalDynamic) {
3908 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
3909 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
3910 // the beginning of the module's TLS region, followed by a DTPREL offset
3911 // calculation.
3912
3913 // These accesses will need deduplicating if there's more than one.
3914 AArch64FunctionInfo *MFI =
3915 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
3916 MFI->incNumLocalDynamicTLSAccesses();
3917
3918 // The call needs a relocation too for linker relaxation. It doesn't make
3919 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
3920 // the address.
3921 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
3922 AArch64II::MO_TLS);
3923
3924 // Now we can calculate the offset from TPIDR_EL0 to this module's
3925 // thread-local area.
3926 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
3927
3928 // Now use :dtprel_whatever: operations to calculate this variable's offset
3929 // in its thread-storage area.
3930 SDValue HiVar = DAG.getTargetGlobalAddress(
3931 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
3932 SDValue LoVar = DAG.getTargetGlobalAddress(
3933 GV, DL, MVT::i64, 0,
3934 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3935
3936 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
3937 DAG.getTargetConstant(0, DL, MVT::i32)),
3938 0);
3939 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
3940 DAG.getTargetConstant(0, DL, MVT::i32)),
3941 0);
3942 } else if (Model == TLSModel::GeneralDynamic) {
3943 // The call needs a relocation too for linker relaxation. It doesn't make
3944 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
3945 // the address.
3946 SDValue SymAddr =
3947 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3948
3949 // Finally we can make a call to calculate the offset from tpidr_el0.
3950 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
3951 } else
3952 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3952)
;
3953
3954 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
3955}
3956
3957SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
3958 SelectionDAG &DAG) const {
3959 if (Subtarget->isTargetDarwin())
3960 return LowerDarwinGlobalTLSAddress(Op, DAG);
3961 if (Subtarget->isTargetELF())
3962 return LowerELFGlobalTLSAddress(Op, DAG);
3963
3964 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3964)
;
3965}
3966
3967SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3968 SDValue Chain = Op.getOperand(0);
3969 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3970 SDValue LHS = Op.getOperand(2);
3971 SDValue RHS = Op.getOperand(3);
3972 SDValue Dest = Op.getOperand(4);
3973 SDLoc dl(Op);
3974
3975 // Handle f128 first, since lowering it will result in comparing the return
3976 // value of a libcall against zero, which is just what the rest of LowerBR_CC
3977 // is expecting to deal with.
3978 if (LHS.getValueType() == MVT::f128) {
3979 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
3980
3981 // If softenSetCCOperands returned a scalar, we need to compare the result
3982 // against zero to select between true and false values.
3983 if (!RHS.getNode()) {
3984 RHS = DAG.getConstant(0, dl, LHS.getValueType());
3985 CC = ISD::SETNE;
3986 }
3987 }
3988
3989 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
3990 // instruction.
3991 if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS)) {
3992 assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unexpected condition code."
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Unexpected condition code.\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3993, __PRETTY_FUNCTION__))
3993 "Unexpected condition code.")(((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unexpected condition code."
) ? static_cast<void> (0) : __assert_fail ("(CC == ISD::SETEQ || CC == ISD::SETNE) && \"Unexpected condition code.\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3993, __PRETTY_FUNCTION__))
;
3994 // Only lower legal XALUO ops.
3995 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
3996 return SDValue();
3997
3998 // The actual operation with overflow check.
3999 AArch64CC::CondCode OFCC;
4000 SDValue Value, Overflow;
4001 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
4002
4003 if (CC == ISD::SETNE)
4004 OFCC = getInvertedCondCode(OFCC);
4005 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
4006
4007 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4008 Overflow);
4009 }
4010
4011 if (LHS.getValueType().isInteger()) {
4012 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4013, __PRETTY_FUNCTION__))
4013 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4013, __PRETTY_FUNCTION__))
;
4014
4015 // If the RHS of the comparison is zero, we can potentially fold this
4016 // to a specialized branch.
4017 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
4018 if (RHSC && RHSC->getZExtValue() == 0) {
4019 if (CC == ISD::SETEQ) {
4020 // See if we can use a TBZ to fold in an AND as well.
4021 // TBZ has a smaller branch displacement than CBZ. If the offset is
4022 // out of bounds, a late MI-layer pass rewrites branches.
4023 // 403.gcc is an example that hits this case.
4024 if (LHS.getOpcode() == ISD::AND &&
4025 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4026 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4027 SDValue Test = LHS.getOperand(0);
4028 uint64_t Mask = LHS.getConstantOperandVal(1);
4029 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
4030 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4031 Dest);
4032 }
4033
4034 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
4035 } else if (CC == ISD::SETNE) {
4036 // See if we can use a TBZ to fold in an AND as well.
4037 // TBZ has a smaller branch displacement than CBZ. If the offset is
4038 // out of bounds, a late MI-layer pass rewrites branches.
4039 // 403.gcc is an example that hits this case.
4040 if (LHS.getOpcode() == ISD::AND &&
4041 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4042 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4043 SDValue Test = LHS.getOperand(0);
4044 uint64_t Mask = LHS.getConstantOperandVal(1);
4045 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
4046 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4047 Dest);
4048 }
4049
4050 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
4051 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
4052 // Don't combine AND since emitComparison converts the AND to an ANDS
4053 // (a.k.a. TST) and the test in the test bit and branch instruction
4054 // becomes redundant. This would also increase register pressure.
4055 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4056 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
4057 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4058 }
4059 }
4060 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
4061 LHS.getOpcode() != ISD::AND) {
4062 // Don't combine AND since emitComparison converts the AND to an ANDS
4063 // (a.k.a. TST) and the test in the test bit and branch instruction
4064 // becomes redundant. This would also increase register pressure.
4065 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4066 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
4067 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4068 }
4069
4070 SDValue CCVal;
4071 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4072 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4073 Cmp);
4074 }
4075
4076 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4077, __PRETTY_FUNCTION__))
4077 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4077, __PRETTY_FUNCTION__))
;
4078
4079 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4080 // clean. Some of them require two branches to implement.
4081 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4082 AArch64CC::CondCode CC1, CC2;
4083 changeFPCCToAArch64CC(CC, CC1, CC2);
4084 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4085 SDValue BR1 =
4086 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
4087 if (CC2 != AArch64CC::AL) {
4088 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4089 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
4090 Cmp);
4091 }
4092
4093 return BR1;
4094}
4095
4096SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
4097 SelectionDAG &DAG) const {
4098 EVT VT = Op.getValueType();
4099 SDLoc DL(Op);
4100
4101 SDValue In1 = Op.getOperand(0);
4102 SDValue In2 = Op.getOperand(1);
4103 EVT SrcVT = In2.getValueType();
4104
4105 if (SrcVT.bitsLT(VT))
4106 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
4107 else if (SrcVT.bitsGT(VT))
4108 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
4109
4110 EVT VecVT;
4111 uint64_t EltMask;
4112 SDValue VecVal1, VecVal2;
4113
4114 auto setVecVal = [&] (int Idx) {
4115 if (!VT.isVector()) {
4116 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4117 DAG.getUNDEF(VecVT), In1);
4118 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4119 DAG.getUNDEF(VecVT), In2);
4120 } else {
4121 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
4122 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
4123 }
4124 };
4125
4126 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
4127 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
4128 EltMask = 0x80000000ULL;
4129 setVecVal(AArch64::ssub);
4130 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
4131 VecVT = MVT::v2i64;
4132
4133 // We want to materialize a mask with the high bit set, but the AdvSIMD
4134 // immediate moves cannot materialize that in a single instruction for
4135 // 64-bit elements. Instead, materialize zero and then negate it.
4136 EltMask = 0;
4137
4138 setVecVal(AArch64::dsub);
4139 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
4140 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
4141 EltMask = 0x8000ULL;
4142 setVecVal(AArch64::hsub);
4143 } else {
4144 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4144)
;
4145 }
4146
4147 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
4148
4149 // If we couldn't materialize the mask above, then the mask vector will be
4150 // the zero vector, and we need to negate it here.
4151 if (VT == MVT::f64 || VT == MVT::v2f64) {
4152 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
4153 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
4154 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
4155 }
4156
4157 SDValue Sel =
4158 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
4159
4160 if (VT == MVT::f16)
4161 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
4162 if (VT == MVT::f32)
4163 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
4164 else if (VT == MVT::f64)
4165 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
4166 else
4167 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
4168}
4169
4170SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
4171 if (DAG.getMachineFunction().getFunction()->hasFnAttribute(
4172 Attribute::NoImplicitFloat))
4173 return SDValue();
4174
4175 if (!Subtarget->hasNEON())
4176 return SDValue();
4177
4178 // While there is no integer popcount instruction, it can
4179 // be more efficiently lowered to the following sequence that uses
4180 // AdvSIMD registers/instructions as long as the copies to/from
4181 // the AdvSIMD registers are cheap.
4182 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
4183 // CNT V0.8B, V0.8B // 8xbyte pop-counts
4184 // ADDV B0, V0.8B // sum 8xbyte pop-counts
4185 // UMOV X0, V0.B[0] // copy byte result back to integer reg
4186 SDValue Val = Op.getOperand(0);
4187 SDLoc DL(Op);
4188 EVT VT = Op.getValueType();
4189
4190 if (VT == MVT::i32)
4191 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
4192 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
4193
4194 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
4195 SDValue UaddLV = DAG.getNode(
4196 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
4197 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
4198
4199 if (VT == MVT::i64)
4200 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
4201 return UaddLV;
4202}
4203
4204SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
4205
4206 if (Op.getValueType().isVector())
4207 return LowerVSETCC(Op, DAG);
4208
4209 SDValue LHS = Op.getOperand(0);
4210 SDValue RHS = Op.getOperand(1);
4211 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
4212 SDLoc dl(Op);
4213
4214 // We chose ZeroOrOneBooleanContents, so use zero and one.
4215 EVT VT = Op.getValueType();
4216 SDValue TVal = DAG.getConstant(1, dl, VT);
4217 SDValue FVal = DAG.getConstant(0, dl, VT);
4218
4219 // Handle f128 first, since one possible outcome is a normal integer
4220 // comparison which gets picked up by the next if statement.
4221 if (LHS.getValueType() == MVT::f128) {
4222 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4223
4224 // If softenSetCCOperands returned a scalar, use it.
4225 if (!RHS.getNode()) {
4226 assert(LHS.getValueType() == Op.getValueType() &&((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4227, __PRETTY_FUNCTION__))
4227 "Unexpected setcc expansion!")((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4227, __PRETTY_FUNCTION__))
;
4228 return LHS;
4229 }
4230 }
4231
4232 if (LHS.getValueType().isInteger()) {
4233 SDValue CCVal;
4234 SDValue Cmp =
4235 getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
4236
4237 // Note that we inverted the condition above, so we reverse the order of
4238 // the true and false operands here. This will allow the setcc to be
4239 // matched to a single CSINC instruction.
4240 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
4241 }
4242
4243 // Now we know we're dealing with FP values.
4244 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4245, __PRETTY_FUNCTION__))
4245 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4245, __PRETTY_FUNCTION__))
;
4246
4247 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
4248 // and do the comparison.
4249 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4250
4251 AArch64CC::CondCode CC1, CC2;
4252 changeFPCCToAArch64CC(CC, CC1, CC2);
4253 if (CC2 == AArch64CC::AL) {
4254 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
4255 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4256
4257 // Note that we inverted the condition above, so we reverse the order of
4258 // the true and false operands here. This will allow the setcc to be
4259 // matched to a single CSINC instruction.
4260 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
4261 } else {
4262 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
4263 // totally clean. Some of them require two CSELs to implement. As is in
4264 // this case, we emit the first CSEL and then emit a second using the output
4265 // of the first as the RHS. We're effectively OR'ing the two CC's together.
4266
4267 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
4268 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4269 SDValue CS1 =
4270 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4271
4272 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4273 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4274 }
4275}
4276
4277SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
4278 SDValue RHS, SDValue TVal,
4279 SDValue FVal, const SDLoc &dl,
4280 SelectionDAG &DAG) const {
4281 // Handle f128 first, because it will result in a comparison of some RTLIB
4282 // call result against zero.
4283 if (LHS.getValueType() == MVT::f128) {
4284 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4285
4286 // If softenSetCCOperands returned a scalar, we need to compare the result
4287 // against zero to select between true and false values.
4288 if (!RHS.getNode()) {
4289 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4290 CC = ISD::SETNE;
4291 }
4292 }
4293
4294 // Also handle f16, for which we need to do a f32 comparison.
4295 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4296 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
4297 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
4298 }
4299
4300 // Next, handle integers.
4301 if (LHS.getValueType().isInteger()) {
4302 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4303, __PRETTY_FUNCTION__))
4303 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4303, __PRETTY_FUNCTION__))
;
4304
4305 unsigned Opcode = AArch64ISD::CSEL;
4306
4307 // If both the TVal and the FVal are constants, see if we can swap them in
4308 // order to for a CSINV or CSINC out of them.
4309 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
4310 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
4311
4312 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
4313 std::swap(TVal, FVal);
4314 std::swap(CTVal, CFVal);
4315 CC = ISD::getSetCCInverse(CC, true);
4316 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
4317 std::swap(TVal, FVal);
4318 std::swap(CTVal, CFVal);
4319 CC = ISD::getSetCCInverse(CC, true);
4320 } else if (TVal.getOpcode() == ISD::XOR) {
4321 // If TVal is a NOT we want to swap TVal and FVal so that we can match
4322 // with a CSINV rather than a CSEL.
4323 if (isAllOnesConstant(TVal.getOperand(1))) {
4324 std::swap(TVal, FVal);
4325 std::swap(CTVal, CFVal);
4326 CC = ISD::getSetCCInverse(CC, true);
4327 }
4328 } else if (TVal.getOpcode() == ISD::SUB) {
4329 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
4330 // that we can match with a CSNEG rather than a CSEL.
4331 if (isNullConstant(TVal.getOperand(0))) {
4332 std::swap(TVal, FVal);
4333 std::swap(CTVal, CFVal);
4334 CC = ISD::getSetCCInverse(CC, true);
4335 }
4336 } else if (CTVal && CFVal) {
4337 const int64_t TrueVal = CTVal->getSExtValue();
4338 const int64_t FalseVal = CFVal->getSExtValue();
4339 bool Swap = false;
4340
4341 // If both TVal and FVal are constants, see if FVal is the
4342 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
4343 // instead of a CSEL in that case.
4344 if (TrueVal == ~FalseVal) {
4345 Opcode = AArch64ISD::CSINV;
4346 } else if (TrueVal == -FalseVal) {
4347 Opcode = AArch64ISD::CSNEG;
4348 } else if (TVal.getValueType() == MVT::i32) {
4349 // If our operands are only 32-bit wide, make sure we use 32-bit
4350 // arithmetic for the check whether we can use CSINC. This ensures that
4351 // the addition in the check will wrap around properly in case there is
4352 // an overflow (which would not be the case if we do the check with
4353 // 64-bit arithmetic).
4354 const uint32_t TrueVal32 = CTVal->getZExtValue();
4355 const uint32_t FalseVal32 = CFVal->getZExtValue();
4356
4357 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
4358 Opcode = AArch64ISD::CSINC;
4359
4360 if (TrueVal32 > FalseVal32) {
4361 Swap = true;
4362 }
4363 }
4364 // 64-bit check whether we can use CSINC.
4365 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
4366 Opcode = AArch64ISD::CSINC;
4367
4368 if (TrueVal > FalseVal) {
4369 Swap = true;
4370 }
4371 }
4372
4373 // Swap TVal and FVal if necessary.
4374 if (Swap) {
4375 std::swap(TVal, FVal);
4376 std::swap(CTVal, CFVal);
4377 CC = ISD::getSetCCInverse(CC, true);
4378 }
4379
4380 if (Opcode != AArch64ISD::CSEL) {
4381 // Drop FVal since we can get its value by simply inverting/negating
4382 // TVal.
4383 FVal = TVal;
4384 }
4385 }
4386
4387 // Avoid materializing a constant when possible by reusing a known value in
4388 // a register. However, don't perform this optimization if the known value
4389 // is one, zero or negative one in the case of a CSEL. We can always
4390 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
4391 // FVal, respectively.
4392 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
4393 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
4394 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
4395 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4396 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
4397 // "a != C ? x : a" to avoid materializing C.
4398 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
4399 TVal = LHS;
4400 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
4401 FVal = LHS;
4402 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
4403 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")((CTVal && CFVal && "Expected constant operands for CSNEG."
) ? static_cast<void> (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4403, __PRETTY_FUNCTION__))
;
4404 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
4405 // avoid materializing C.
4406 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4407 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
4408 Opcode = AArch64ISD::CSINV;
4409 TVal = LHS;
4410 FVal = DAG.getConstant(0, dl, FVal.getValueType());
4411 }
4412 }
4413
4414 SDValue CCVal;
4415 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4416 EVT VT = TVal.getValueType();
4417 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
4418 }
4419
4420 // Now we know we're dealing with FP values.
4421 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4422, __PRETTY_FUNCTION__))
4422 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4422, __PRETTY_FUNCTION__))
;
4423 assert(LHS.getValueType() == RHS.getValueType())((LHS.getValueType() == RHS.getValueType()) ? static_cast<
void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4423, __PRETTY_FUNCTION__))
;
4424 EVT VT = TVal.getValueType();
4425 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4426
4427 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4428 // clean. Some of them require two CSELs to implement.
4429 AArch64CC::CondCode CC1, CC2;
4430 changeFPCCToAArch64CC(CC, CC1, CC2);
4431
4432 if (DAG.getTarget().Options.UnsafeFPMath) {
4433 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
4434 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
4435 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
4436 if (RHSVal && RHSVal->isZero()) {
4437 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
4438 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
4439
4440 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
4441 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
4442 TVal = LHS;
4443 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
4444 CFVal && CFVal->isZero() &&
4445 FVal.getValueType() == LHS.getValueType())
4446 FVal = LHS;
4447 }
4448 }
4449
4450 // Emit first, and possibly only, CSEL.
4451 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4452 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4453
4454 // If we need a second CSEL, emit it, using the output of the first as the
4455 // RHS. We're effectively OR'ing the two CC's together.
4456 if (CC2 != AArch64CC::AL) {
4457 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4458 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4459 }
4460
4461 // Otherwise, return the output of the first CSEL.
4462 return CS1;
4463}
4464
4465SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
4466 SelectionDAG &DAG) const {
4467 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4468 SDValue LHS = Op.getOperand(0);
4469 SDValue RHS = Op.getOperand(1);
4470 SDValue TVal = Op.getOperand(2);
4471 SDValue FVal = Op.getOperand(3);
4472 SDLoc DL(Op);
4473 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4474}
4475
4476SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
4477 SelectionDAG &DAG) const {
4478 SDValue CCVal = Op->getOperand(0);
4479 SDValue TVal = Op->getOperand(1);
4480 SDValue FVal = Op->getOperand(2);
4481 SDLoc DL(Op);
4482
4483 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
4484 // instruction.
4485 if (isOverflowIntrOpRes(CCVal)) {
4486 // Only lower legal XALUO ops.
4487 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
4488 return SDValue();
4489
4490 AArch64CC::CondCode OFCC;
4491 SDValue Value, Overflow;
4492 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
4493 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
4494
4495 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
4496 CCVal, Overflow);
4497 }
4498
4499 // Lower it the same way as we would lower a SELECT_CC node.
4500 ISD::CondCode CC;
4501 SDValue LHS, RHS;
4502 if (CCVal.getOpcode() == ISD::SETCC) {
4503 LHS = CCVal.getOperand(0);
4504 RHS = CCVal.getOperand(1);
4505 CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
4506 } else {
4507 LHS = CCVal;
4508 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
4509 CC = ISD::SETNE;
4510 }
4511 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4512}
4513
4514SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
4515 SelectionDAG &DAG) const {
4516 // Jump table entries as PC relative offsets. No additional tweaking
4517 // is necessary here. Just get the address of the jump table.
4518 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
4519
4520 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4521 !Subtarget->isTargetMachO()) {
4522 return getAddrLarge(JT, DAG);
4523 }
4524 return getAddr(JT, DAG);
4525}
4526
4527SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
4528 SelectionDAG &DAG) const {
4529 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
4530
4531 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4532 // Use the GOT for the large code model on iOS.
4533 if (Subtarget->isTargetMachO()) {
4534 return getGOT(CP, DAG);
4535 }
4536 return getAddrLarge(CP, DAG);
4537 } else {
4538 return getAddr(CP, DAG);
4539 }
4540}
4541
4542SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
4543 SelectionDAG &DAG) const {
4544 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
4545 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4546 !Subtarget->isTargetMachO()) {
4547 return getAddrLarge(BA, DAG);
4548 } else {
4549 return getAddr(BA, DAG);
4550 }
4551}
4552
4553SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
4554 SelectionDAG &DAG) const {
4555 AArch64FunctionInfo *FuncInfo =
4556 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4557
4558 SDLoc DL(Op);
4559 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
4560 getPointerTy(DAG.getDataLayout()));
4561 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4562 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4563 MachinePointerInfo(SV));
4564}
4565
4566SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
4567 SelectionDAG &DAG) const {
4568 AArch64FunctionInfo *FuncInfo =
4569 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4570
4571 SDLoc DL(Op);
4572 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
4573 ? FuncInfo->getVarArgsGPRIndex()
4574 : FuncInfo->getVarArgsStackIndex(),
4575 getPointerTy(DAG.getDataLayout()));
4576 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4577 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4578 MachinePointerInfo(SV));
4579}
4580
4581SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
4582 SelectionDAG &DAG) const {
4583 // The layout of the va_list struct is specified in the AArch64 Procedure Call
4584 // Standard, section B.3.
4585 MachineFunction &MF = DAG.getMachineFunction();
4586 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4587 auto PtrVT = getPointerTy(DAG.getDataLayout());
4588 SDLoc DL(Op);
4589
4590 SDValue Chain = Op.getOperand(0);
4591 SDValue VAList = Op.getOperand(1);
4592 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4593 SmallVector<SDValue, 4> MemOps;
4594
4595 // void *__stack at offset 0
4596 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
4597 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
4598 MachinePointerInfo(SV), /* Alignment = */ 8));
4599
4600 // void *__gr_top at offset 8
4601 int GPRSize = FuncInfo->getVarArgsGPRSize();
4602 if (GPRSize > 0) {
4603 SDValue GRTop, GRTopAddr;
4604
4605 GRTopAddr =
4606 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
4607
4608 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
4609 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
4610 DAG.getConstant(GPRSize, DL, PtrVT));
4611
4612 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
4613 MachinePointerInfo(SV, 8),
4614 /* Alignment = */ 8));
4615 }
4616
4617 // void *__vr_top at offset 16
4618 int FPRSize = FuncInfo->getVarArgsFPRSize();
4619 if (FPRSize > 0) {
4620 SDValue VRTop, VRTopAddr;
4621 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4622 DAG.getConstant(16, DL, PtrVT));
4623
4624 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
4625 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
4626 DAG.getConstant(FPRSize, DL, PtrVT));
4627
4628 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
4629 MachinePointerInfo(SV, 16),
4630 /* Alignment = */ 8));
4631 }
4632
4633 // int __gr_offs at offset 24
4634 SDValue GROffsAddr =
4635 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
4636 MemOps.push_back(DAG.getStore(
4637 Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
4638 MachinePointerInfo(SV, 24), /* Alignment = */ 4));
4639
4640 // int __vr_offs at offset 28
4641 SDValue VROffsAddr =
4642 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
4643 MemOps.push_back(DAG.getStore(
4644 Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
4645 MachinePointerInfo(SV, 28), /* Alignment = */ 4));
4646
4647 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4648}
4649
4650SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
4651 SelectionDAG &DAG) const {
4652 MachineFunction &MF = DAG.getMachineFunction();
4653
4654 if (Subtarget->isCallingConvWin64(MF.getFunction()->getCallingConv()))
4655 return LowerWin64_VASTART(Op, DAG);
4656 else if (Subtarget->isTargetDarwin())
4657 return LowerDarwin_VASTART(Op, DAG);
4658 else
4659 return LowerAAPCS_VASTART(Op, DAG);
4660}
4661
4662SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
4663 SelectionDAG &DAG) const {
4664 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
4665 // pointer.
4666 SDLoc DL(Op);
4667 unsigned VaListSize =
4668 Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
4669 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4670 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4671
4672 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
4673 Op.getOperand(2),
4674 DAG.getConstant(VaListSize, DL, MVT::i32),
4675 8, false, false, false, MachinePointerInfo(DestSV),
4676 MachinePointerInfo(SrcSV));
4677}
4678
4679SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
4680 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4681, __PRETTY_FUNCTION__))
4681 "automatic va_arg instruction only works on Darwin")((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4681, __PRETTY_FUNCTION__))
;
4682
4683 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4684 EVT VT = Op.getValueType();
4685 SDLoc DL(Op);
4686 SDValue Chain = Op.getOperand(0);
4687 SDValue Addr = Op.getOperand(1);
4688 unsigned Align = Op.getConstantOperandVal(3);
4689 auto PtrVT = getPointerTy(DAG.getDataLayout());
4690
4691 SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V));
4692 Chain = VAList.getValue(1);
4693
4694 if (Align > 8) {
4695 assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2")((((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"
) ? static_cast<void> (0) : __assert_fail ("((Align & (Align - 1)) == 0) && \"Expected Align to be a power of 2\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4695, __PRETTY_FUNCTION__))
;
4696 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4697 DAG.getConstant(Align - 1, DL, PtrVT));
4698 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
4699 DAG.getConstant(-(int64_t)Align, DL, PtrVT));
4700 }
4701
4702 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
4703 uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
4704
4705 // Scalar integer and FP values smaller than 64 bits are implicitly extended
4706 // up to 64 bits. At the very least, we have to increase the striding of the
4707 // vaargs list to match this, and for FP values we need to introduce
4708 // FP_ROUND nodes as well.
4709 if (VT.isInteger() && !VT.isVector())
4710 ArgSize = 8;
4711 bool NeedFPTrunc = false;
4712 if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
4713 ArgSize = 8;
4714 NeedFPTrunc = true;
4715 }
4716
4717 // Increment the pointer, VAList, to the next vaarg
4718 SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4719 DAG.getConstant(ArgSize, DL, PtrVT));
4720 // Store the incremented VAList to the legalized pointer
4721 SDValue APStore =
4722 DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
4723
4724 // Load the actual argument out of the pointer VAList
4725 if (NeedFPTrunc) {
4726 // Load the value as an f64.
4727 SDValue WideFP =
4728 DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
4729 // Round the value down to an f32.
4730 SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
4731 DAG.getIntPtrConstant(1, DL));
4732 SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
4733 // Merge the rounded value with the chain output of the load.
4734 return DAG.getMergeValues(Ops, DL);
4735 }
4736
4737 return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
4738}
4739
4740SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
4741 SelectionDAG &DAG) const {
4742 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4743 MFI.setFrameAddressIsTaken(true);
4744
4745 EVT VT = Op.getValueType();
4746 SDLoc DL(Op);
4747 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4748 SDValue FrameAddr =
4749 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
4750 while (Depth--)
4751 FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
4752 MachinePointerInfo());
4753 return FrameAddr;
4754}
4755
4756// FIXME? Maybe this could be a TableGen attribute on some registers and
4757// this table could be generated automatically from RegInfo.
4758unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
4759 SelectionDAG &DAG) const {
4760 unsigned Reg = StringSwitch<unsigned>(RegName)
4761 .Case("sp", AArch64::SP)
4762 .Case("x18", AArch64::X18)
4763 .Case("w18", AArch64::W18)
4764 .Default(0);
4765 if ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
4766 !Subtarget->isX18Reserved())
4767 Reg = 0;
4768 if (Reg)
4769 return Reg;
4770 report_fatal_error(Twine("Invalid register name \""
4771 + StringRef(RegName) + "\"."));
4772}
4773
4774SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
4775 SelectionDAG &DAG) const {
4776 MachineFunction &MF = DAG.getMachineFunction();
4777 MachineFrameInfo &MFI = MF.getFrameInfo();
4778 MFI.setReturnAddressIsTaken(true);
4779
4780 EVT VT = Op.getValueType();
4781 SDLoc DL(Op);
4782 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4783 if (Depth) {
4784 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4785 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
4786 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
4787 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
4788 MachinePointerInfo());
4789 }
4790
4791 // Return LR, which contains the return address. Mark it an implicit live-in.
4792 unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
4793 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
4794}
4795
4796/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4797/// i64 values and take a 2 x i64 value to shift plus a shift amount.
4798SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
4799 SelectionDAG &DAG) const {
4800 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4800, __PRETTY_FUNCTION__))
;
4801 EVT VT = Op.getValueType();
4802 unsigned VTBits = VT.getSizeInBits();
4803 SDLoc dl(Op);
4804 SDValue ShOpLo = Op.getOperand(0);
4805 SDValue ShOpHi = Op.getOperand(1);
4806 SDValue ShAmt = Op.getOperand(2);
4807 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4808
4809 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::
SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4809, __PRETTY_FUNCTION__))
;
4810
4811 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
4812 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
4813 SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4814
4815 // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
4816 // is "undef". We wanted 0, so CSEL it directly.
4817 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
4818 ISD::SETEQ, dl, DAG);
4819 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
4820 HiBitsForLo =
4821 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
4822 HiBitsForLo, CCVal, Cmp);
4823
4824 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
4825 DAG.getConstant(VTBits, dl, MVT::i64));
4826
4827 SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4828 SDValue LoForNormalShift =
4829 DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
4830
4831 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
4832 dl, DAG);
4833 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
4834 SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4835 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
4836 LoForNormalShift, CCVal, Cmp);
4837
4838 // AArch64 shifts larger than the register width are wrapped rather than
4839 // clamped, so we can't just emit "hi >> x".
4840 SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4841 SDValue HiForBigShift =
4842 Opc == ISD::SRA
4843 ? DAG.getNode(Opc, dl, VT, ShOpHi,
4844 DAG.getConstant(VTBits - 1, dl, MVT::i64))
4845 : DAG.getConstant(0, dl, VT);
4846 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
4847 HiForNormalShift, CCVal, Cmp);
4848
4849 SDValue Ops[2] = { Lo, Hi };
4850 return DAG.getMergeValues(Ops, dl);
4851}
4852
4853/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4854/// i64 values and take a 2 x i64 value to shift plus a shift amount.
4855SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
4856 SelectionDAG &DAG) const {
4857 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4857, __PRETTY_FUNCTION__))
;
4858 EVT VT = Op.getValueType();
4859 unsigned VTBits = VT.getSizeInBits();
4860 SDLoc dl(Op);
4861 SDValue ShOpLo = Op.getOperand(0);
4862 SDValue ShOpHi = Op.getOperand(1);
4863 SDValue ShAmt = Op.getOperand(2);
4864
4865 assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void>
(0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4865, __PRETTY_FUNCTION__))
;
4866 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
4867 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
4868 SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4869
4870 // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
4871 // is "undef". We wanted 0, so CSEL it directly.
4872 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
4873 ISD::SETEQ, dl, DAG);
4874 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
4875 LoBitsForHi =
4876 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
4877 LoBitsForHi, CCVal, Cmp);
4878
4879 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
4880 DAG.getConstant(VTBits, dl, MVT::i64));
4881 SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4882 SDValue HiForNormalShift =
4883 DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
4884
4885 SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4886
4887 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
4888 dl, DAG);
4889 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
4890 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
4891 HiForNormalShift, CCVal, Cmp);
4892
4893 // AArch64 shifts of larger than register sizes are wrapped rather than
4894 // clamped, so we can't just emit "lo << a" if a is too big.
4895 SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
4896 SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4897 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
4898 LoForNormalShift, CCVal, Cmp);
4899
4900 SDValue Ops[2] = { Lo, Hi };
4901 return DAG.getMergeValues(Ops, dl);
4902}
4903
4904bool AArch64TargetLowering::isOffsetFoldingLegal(
4905 const GlobalAddressSDNode *GA) const {
4906 DEBUG(dbgs() << "Skipping offset folding global address: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Skipping offset folding global address: "
; } } while (false)
;
4907 DEBUG(GA->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { GA->dump(); } } while (false)
;
4908 DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64 doesn't support folding offsets into global "
"addresses\n"; } } while (false)
4909 "addresses\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64 doesn't support folding offsets into global "
"addresses\n"; } } while (false)
;
4910 return false;
4911}
4912
4913bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4914 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
4915 // FIXME: We should be able to handle f128 as well with a clever lowering.
4916 if (Imm.isPosZero() && (VT == MVT::f16 || VT == MVT::f64 || VT == MVT::f32)) {
4917 DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal fp imm: materialize 0 using the zero register\n"
; } } while (false)
;
4918 return true;
4919 }
4920
4921 StringRef FPType;
4922 bool IsLegal = false;
4923 SmallString<128> ImmStrVal;
4924 Imm.toString(ImmStrVal);
4925
4926 if (VT == MVT::f64) {
4927 FPType = "f64";
4928 IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
4929 } else if (VT == MVT::f32) {
4930 FPType = "f32";
4931 IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
4932 } else if (VT == MVT::f16 && Subtarget->hasFullFP16()) {
4933 FPType = "f16";
4934 IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
4935 }
4936
4937 if (IsLegal) {
4938 DEBUG(dbgs() << "Legal " << FPType << " imm value: " << ImmStrVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal " << FPType
<< " imm value: " << ImmStrVal << "\n"; } }
while (false)
;
4939 return true;
4940 }
4941
4942 if (!FPType.empty())
4943 DEBUG(dbgs() << "Illegal " << FPType << " imm value: " << ImmStrVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal " << FPType
<< " imm value: " << ImmStrVal << "\n"; } }
while (false)
;
4944 else
4945 DEBUG(dbgs() << "Illegal fp imm " << ImmStrVal << ": unsupported fp type\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal fp imm " <<
ImmStrVal << ": unsupported fp type\n"; } } while (false
)
;
4946
4947 return false;
4948}
4949
4950//===----------------------------------------------------------------------===//
4951// AArch64 Optimization Hooks
4952//===----------------------------------------------------------------------===//
4953
4954static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
4955 SDValue Operand, SelectionDAG &DAG,
4956 int &ExtraSteps) {
4957 EVT VT = Operand.getValueType();
4958 if (ST->hasNEON() &&
4959 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
4960 VT == MVT::f32 || VT == MVT::v1f32 ||
4961 VT == MVT::v2f32 || VT == MVT::v4f32)) {
4962 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
4963 // For the reciprocal estimates, convergence is quadratic, so the number
4964 // of digits is doubled after each iteration. In ARMv8, the accuracy of
4965 // the initial estimate is 2^-8. Thus the number of extra steps to refine
4966 // the result for float (23 mantissa bits) is 2 and for double (52
4967 // mantissa bits) is 3.
4968 ExtraSteps = VT == MVT::f64 ? 3 : 2;
4969
4970 return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
4971 }
4972
4973 return SDValue();
4974}
4975
4976SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
4977 SelectionDAG &DAG, int Enabled,
4978 int &ExtraSteps,
4979 bool &UseOneConst,
4980 bool Reciprocal) const {
4981 if (Enabled == ReciprocalEstimate::Enabled ||
4982 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
4983 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
4984 DAG, ExtraSteps)) {
4985 SDLoc DL(Operand);
4986 EVT VT = Operand.getValueType();
4987
4988 SDNodeFlags Flags;
4989 Flags.setUnsafeAlgebra(true);
4990
4991 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
4992 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
4993 for (int i = ExtraSteps; i > 0; --i) {
4994 SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
4995 Flags);
4996 Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
4997 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
4998 }
4999
5000 if (!Reciprocal) {
5001 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
5002 VT);
5003 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
5004 SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
5005
5006 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
5007 // Correct the result if the operand is 0.0.
5008 Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
5009 VT, Eq, Operand, Estimate);
5010 }
5011
5012 ExtraSteps = 0;
5013 return Estimate;
5014 }
5015
5016 return SDValue();
5017}
5018
5019SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
5020 SelectionDAG &DAG, int Enabled,
5021 int &ExtraSteps) const {
5022 if (Enabled == ReciprocalEstimate::Enabled)
5023 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
5024 DAG, ExtraSteps)) {
5025 SDLoc DL(Operand);
5026 EVT VT = Operand.getValueType();
5027
5028 SDNodeFlags Flags;
5029 Flags.setUnsafeAlgebra(true);
5030
5031 // Newton reciprocal iteration: E * (2 - X * E)
5032 // AArch64 reciprocal iteration instruction: (2 - M * N)
5033 for (int i = ExtraSteps; i > 0; --i) {
5034 SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
5035 Estimate, Flags);
5036 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5037 }
5038
5039 ExtraSteps = 0;
5040 return Estimate;
5041 }
5042
5043 return SDValue();
5044}
5045
5046//===----------------------------------------------------------------------===//
5047// AArch64 Inline Assembly Support
5048//===----------------------------------------------------------------------===//
5049
5050// Table of Constraints
5051// TODO: This is the current set of constraints supported by ARM for the
5052// compiler, not all of them may make sense, e.g. S may be difficult to support.
5053//
5054// r - A general register
5055// w - An FP/SIMD register of some size in the range v0-v31
5056// x - An FP/SIMD register of some size in the range v0-v15
5057// I - Constant that can be used with an ADD instruction
5058// J - Constant that can be used with a SUB instruction
5059// K - Constant that can be used with a 32-bit logical instruction
5060// L - Constant that can be used with a 64-bit logical instruction
5061// M - Constant that can be used as a 32-bit MOV immediate
5062// N - Constant that can be used as a 64-bit MOV immediate
5063// Q - A memory reference with base register and no offset
5064// S - A symbolic address
5065// Y - Floating point constant zero
5066// Z - Integer constant zero
5067//
5068// Note that general register operands will be output using their 64-bit x
5069// register name, whatever the size of the variable, unless the asm operand
5070// is prefixed by the %w modifier. Floating-point and SIMD register operands
5071// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
5072// %q modifier.
5073const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5074 // At this point, we have to lower this constraint to something else, so we
5075 // lower it to an "r" or "w". However, by doing this we will force the result
5076 // to be in register, while the X constraint is much more permissive.
5077 //
5078 // Although we are correct (we are free to emit anything, without
5079 // constraints), we might break use cases that would expect us to be more
5080 // efficient and emit something else.
5081 if (!Subtarget->hasFPARMv8())
5082 return "r";
5083
5084 if (ConstraintVT.isFloatingPoint())
5085 return "w";
5086
5087 if (ConstraintVT.isVector() &&
5088 (ConstraintVT.getSizeInBits() == 64 ||
5089 ConstraintVT.getSizeInBits() == 128))
5090 return "w";
5091
5092 return "r";
5093}
5094
5095/// getConstraintType - Given a constraint letter, return the type of
5096/// constraint it is for this target.
5097AArch64TargetLowering::ConstraintType
5098AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
5099 if (Constraint.size() == 1) {
5100 switch (Constraint[0]) {
5101 default:
5102 break;
5103 case 'z':
5104 return C_Other;
5105 case 'x':
5106 case 'w':
5107 return C_RegisterClass;
5108 // An address with a single base register. Due to the way we
5109 // currently handle addresses it is the same as 'r'.
5110 case 'Q':
5111 return C_Memory;
5112 }
5113 }
5114 return TargetLowering::getConstraintType(Constraint);
5115}
5116
5117/// Examine constraint type and operand type and determine a weight value.
5118/// This object must already have been set up with the operand type
5119/// and the current alternative constraint selected.
5120TargetLowering::ConstraintWeight
5121AArch64TargetLowering::getSingleConstraintMatchWeight(
5122 AsmOperandInfo &info, const char *constraint) const {
5123 ConstraintWeight weight = CW_Invalid;
5124 Value *CallOperandVal = info.CallOperandVal;
5125 // If we don't have a value, we can't do a match,
5126 // but allow it at the lowest weight.
5127 if (!CallOperandVal)
5128 return CW_Default;
5129 Type *type = CallOperandVal->getType();
5130 // Look at the constraint type.
5131 switch (*constraint) {
5132 default:
5133 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
5134 break;
5135 case 'x':
5136 case 'w':
5137 if (type->isFloatingPointTy() || type->isVectorTy())
5138 weight = CW_Register;
5139 break;
5140 case 'z':
5141 weight = CW_Constant;
5142 break;
5143 }
5144 return weight;
5145}
5146
5147std::pair<unsigned, const TargetRegisterClass *>
5148AArch64TargetLowering::getRegForInlineAsmConstraint(
5149 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5150 if (Constraint.size() == 1) {
5151 switch (Constraint[0]) {
5152 case 'r':
5153 if (VT.getSizeInBits() == 64)
5154 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
5155 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
5156 case 'w':
5157 if (VT.getSizeInBits() == 16)
5158 return std::make_pair(0U, &AArch64::FPR16RegClass);
5159 if (VT.getSizeInBits() == 32)
5160 return std::make_pair(0U, &AArch64::FPR32RegClass);
5161 if (VT.getSizeInBits() == 64)
5162 return std::make_pair(0U, &AArch64::FPR64RegClass);
5163 if (VT.getSizeInBits() == 128)
5164 return std::make_pair(0U, &AArch64::FPR128RegClass);
5165 break;
5166 // The instructions that this constraint is designed for can
5167 // only take 128-bit registers so just use that regclass.
5168 case 'x':
5169 if (VT.getSizeInBits() == 128)
5170 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
5171 break;
5172 }
5173 }
5174 if (StringRef("{cc}").equals_lower(Constraint))
5175 return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
5176
5177 // Use the default implementation in TargetLowering to convert the register
5178 // constraint into a member of a register class.
5179 std::pair<unsigned, const TargetRegisterClass *> Res;
5180 Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5181
5182 // Not found as a standard register?
5183 if (!Res.second) {
5184 unsigned Size = Constraint.size();
5185 if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
5186 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
5187 int RegNo;
5188 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
5189 if (!Failed && RegNo >= 0 && RegNo <= 31) {
5190 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
5191 // By default we'll emit v0-v31 for this unless there's a modifier where
5192 // we'll emit the correct register as well.
5193 if (VT != MVT::Other && VT.getSizeInBits() == 64) {
5194 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
5195 Res.second = &AArch64::FPR64RegClass;
5196 } else {
5197 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
5198 Res.second = &AArch64::FPR128RegClass;
5199 }
5200 }
5201 }
5202 }
5203
5204 return Res;
5205}
5206
5207/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5208/// vector. If it is invalid, don't add anything to Ops.
5209void AArch64TargetLowering::LowerAsmOperandForConstraint(
5210 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
5211 SelectionDAG &DAG) const {
5212 SDValue Result;
5213
5214 // Currently only support length 1 constraints.
5215 if (Constraint.length() != 1)
5216 return;
5217
5218 char ConstraintLetter = Constraint[0];
5219 switch (ConstraintLetter) {
5220 default:
5221 break;
5222
5223 // This set of constraints deal with valid constants for various instructions.
5224 // Validate and return a target constant for them if we can.
5225 case 'z': {
5226 // 'z' maps to xzr or wzr so it needs an input of 0.
5227 if (!isNullConstant(Op))
5228 return;
5229
5230 if (Op.getValueType() == MVT::i64)
5231 Result = DAG.getRegister(AArch64::XZR, MVT::i64);
5232 else
5233 Result = DAG.getRegister(AArch64::WZR, MVT::i32);
5234 break;
5235 }
5236
5237 case 'I':
5238 case 'J':
5239 case 'K':
5240 case 'L':
5241 case 'M':
5242 case 'N':
5243 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5244 if (!C)
5245 return;
5246
5247 // Grab the value and do some validation.
5248 uint64_t CVal = C->getZExtValue();
5249 switch (ConstraintLetter) {
5250 // The I constraint applies only to simple ADD or SUB immediate operands:
5251 // i.e. 0 to 4095 with optional shift by 12
5252 // The J constraint applies only to ADD or SUB immediates that would be
5253 // valid when negated, i.e. if [an add pattern] were to be output as a SUB
5254 // instruction [or vice versa], in other words -1 to -4095 with optional
5255 // left shift by 12.
5256 case 'I':
5257 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
5258 break;
5259 return;
5260 case 'J': {
5261 uint64_t NVal = -C->getSExtValue();
5262 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
5263 CVal = C->getSExtValue();
5264 break;
5265 }
5266 return;
5267 }
5268 // The K and L constraints apply *only* to logical immediates, including
5269 // what used to be the MOVI alias for ORR (though the MOVI alias has now
5270 // been removed and MOV should be used). So these constraints have to
5271 // distinguish between bit patterns that are valid 32-bit or 64-bit
5272 // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
5273 // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
5274 // versa.
5275 case 'K':
5276 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5277 break;
5278 return;
5279 case 'L':
5280 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5281 break;
5282 return;
5283 // The M and N constraints are a superset of K and L respectively, for use
5284 // with the MOV (immediate) alias. As well as the logical immediates they
5285 // also match 32 or 64-bit immediates that can be loaded either using a
5286 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
5287 // (M) or 64-bit 0x1234000000000000 (N) etc.
5288 // As a note some of this code is liberally stolen from the asm parser.
5289 case 'M': {
5290 if (!isUInt<32>(CVal))
5291 return;
5292 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5293 break;
5294 if ((CVal & 0xFFFF) == CVal)
5295 break;
5296 if ((CVal & 0xFFFF0000ULL) == CVal)
5297 break;
5298 uint64_t NCVal = ~(uint32_t)CVal;
5299 if ((NCVal & 0xFFFFULL) == NCVal)
5300 break;
5301 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5302 break;
5303 return;
5304 }
5305 case 'N': {
5306 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5307 break;
5308 if ((CVal & 0xFFFFULL) == CVal)
5309 break;
5310 if ((CVal & 0xFFFF0000ULL) == CVal)
5311 break;
5312 if ((CVal & 0xFFFF00000000ULL) == CVal)
5313 break;
5314 if ((CVal & 0xFFFF000000000000ULL) == CVal)
5315 break;
5316 uint64_t NCVal = ~CVal;
5317 if ((NCVal & 0xFFFFULL) == NCVal)
5318 break;
5319 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5320 break;
5321 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
5322 break;
5323 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
5324 break;
5325 return;
5326 }
5327 default:
5328 return;
5329 }
5330
5331 // All assembler immediates are 64-bit integers.
5332 Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
5333 break;
5334 }
5335
5336 if (Result.getNode()) {
5337 Ops.push_back(Result);
5338 return;
5339 }
5340
5341 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5342}
5343
5344//===----------------------------------------------------------------------===//
5345// AArch64 Advanced SIMD Support
5346//===----------------------------------------------------------------------===//
5347
5348/// WidenVector - Given a value in the V64 register class, produce the
5349/// equivalent value in the V128 register class.
5350static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
5351 EVT VT = V64Reg.getValueType();
5352 unsigned NarrowSize = VT.getVectorNumElements();
5353 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5354 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
5355 SDLoc DL(V64Reg);
5356
5357 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
5358 V64Reg, DAG.getConstant(0, DL, MVT::i32));
5359}
5360
5361/// getExtFactor - Determine the adjustment factor for the position when
5362/// generating an "extract from vector registers" instruction.
5363static unsigned getExtFactor(SDValue &V) {
5364 EVT EltType = V.getValueType().getVectorElementType();
5365 return EltType.getSizeInBits() / 8;
5366}
5367
5368/// NarrowVector - Given a value in the V128 register class, produce the
5369/// equivalent value in the V64 register class.
5370static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
5371 EVT VT = V128Reg.getValueType();
5372 unsigned WideSize = VT.getVectorNumElements();
5373 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5374 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
5375 SDLoc DL(V128Reg);
5376
5377 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
5378}
5379
5380// Gather data to see if the operation can be modelled as a
5381// shuffle in combination with VEXTs.
5382SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
5383 SelectionDAG &DAG) const {
5384 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")((Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5384, __PRETTY_FUNCTION__))
;
5385 DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::ReconstructShuffle\n"
; } } while (false)
;
5386 SDLoc dl(Op);
5387 EVT VT = Op.getValueType();
5388 unsigned NumElts = VT.getVectorNumElements();
5389
5390 struct ShuffleSourceInfo {
5391 SDValue Vec;
5392 unsigned MinElt;
5393 unsigned MaxElt;
5394
5395 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
5396 // be compatible with the shuffle we intend to construct. As a result
5397 // ShuffleVec will be some sliding window into the original Vec.
5398 SDValue ShuffleVec;
5399
5400 // Code should guarantee that element i in Vec starts at element "WindowBase
5401 // + i * WindowScale in ShuffleVec".
5402 int WindowBase;
5403 int WindowScale;
5404
5405 ShuffleSourceInfo(SDValue Vec)
5406 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
5407 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
5408
5409 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
5410 };
5411
5412 // First gather all vectors used as an immediate source for this BUILD_VECTOR
5413 // node.
5414 SmallVector<ShuffleSourceInfo, 2> Sources;
5415 for (unsigned i = 0; i < NumElts; ++i) {
5416 SDValue V = Op.getOperand(i);
5417 if (V.isUndef())
5418 continue;
5419 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5420 !isa<ConstantSDNode>(V.getOperand(1))) {
5421 DEBUG(dbgs() << "Reshuffle failed: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5422 "a shuffle can only come from building a vector from "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5423 "various elements of other vectors, provided their "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5424 "indices are constant\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
;
5425 return SDValue();
5426 }
5427
5428 // Add this element source to the list if it's not already there.
5429 SDValue SourceVec = V.getOperand(0);
5430 auto Source = find(Sources, SourceVec);
5431 if (Source == Sources.end())
5432 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
5433
5434 // Update the minimum and maximum lane number seen.
5435 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
5436 Source->MinElt = std::min(Source->MinElt, EltNo);
5437 Source->MaxElt = std::max(Source->MaxElt, EltNo);
5438 }
5439
5440 if (Sources.size() > 2) {
5441 DEBUG(dbgs() << "Reshuffle failed: currently only do something sane when at "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
5442 "most two source vectors are involved\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
;
5443 return SDValue();
5444 }
5445
5446 // Find out the smallest element size among result and two sources, and use
5447 // it as element size to build the shuffle_vector.
5448 EVT SmallestEltTy = VT.getVectorElementType();
5449 for (auto &Source : Sources) {
5450 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
5451 if (SrcEltTy.bitsLT(SmallestEltTy)) {
5452 SmallestEltTy = SrcEltTy;
5453 }
5454 }
5455 unsigned ResMultiplier =
5456 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
5457 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
5458 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
5459
5460 // If the source vector is too wide or too narrow, we may nevertheless be able
5461 // to construct a compatible shuffle either by concatenating it with UNDEF or
5462 // extracting a suitable range of elements.
5463 for (auto &Src : Sources) {
5464 EVT SrcVT = Src.ShuffleVec.getValueType();
5465
5466 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
5467 continue;
5468
5469 // This stage of the search produces a source with the same element type as
5470 // the original, but with a total width matching the BUILD_VECTOR output.
5471 EVT EltVT = SrcVT.getVectorElementType();
5472 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
5473 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
5474
5475 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
5476 assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits())((2 * SrcVT.getSizeInBits() == VT.getSizeInBits()) ? static_cast
<void> (0) : __assert_fail ("2 * SrcVT.getSizeInBits() == VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5476, __PRETTY_FUNCTION__))
;
5477 // We can pad out the smaller vector for free, so if it's part of a
5478 // shuffle...
5479 Src.ShuffleVec =
5480 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
5481 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
5482 continue;
5483 }
5484
5485 assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits())((SrcVT.getSizeInBits() == 2 * VT.getSizeInBits()) ? static_cast
<void> (0) : __assert_fail ("SrcVT.getSizeInBits() == 2 * VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5485, __PRETTY_FUNCTION__))
;
5486
5487 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
5488 DEBUG(dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
;
5489 return SDValue();
5490 }
5491
5492 if (Src.MinElt >= NumSrcElts) {
5493 // The extraction can just take the second half
5494 Src.ShuffleVec =
5495 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5496 DAG.getConstant(NumSrcElts, dl, MVT::i64));
5497 Src.WindowBase = -NumSrcElts;
5498 } else if (Src.MaxElt < NumSrcElts) {
5499 // The extraction can just take the first half
5500 Src.ShuffleVec =
5501 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5502 DAG.getConstant(0, dl, MVT::i64));
5503 } else {
5504 // An actual VEXT is needed
5505 SDValue VEXTSrc1 =
5506 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5507 DAG.getConstant(0, dl, MVT::i64));
5508 SDValue VEXTSrc2 =
5509 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5510 DAG.getConstant(NumSrcElts, dl, MVT::i64));
5511 unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
5512
5513 Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
5514 VEXTSrc2,
5515 DAG.getConstant(Imm, dl, MVT::i32));
5516 Src.WindowBase = -Src.MinElt;
5517 }
5518 }
5519
5520 // Another possible incompatibility occurs from the vector element types. We
5521 // can fix this by bitcasting the source vectors to the same type we intend
5522 // for the shuffle.
5523 for (auto &Src : Sources) {
5524 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
5525 if (SrcEltTy == SmallestEltTy)
5526 continue;
5527 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)((ShuffleVT.getVectorElementType() == SmallestEltTy) ? static_cast
<void> (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5527, __PRETTY_FUNCTION__))
;
5528 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
5529 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
5530 Src.WindowBase *= Src.WindowScale;
5531 }
5532
5533 // Final sanity check before we try to actually produce a shuffle.
5534 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) ((Src.ShuffleVec
.getValueType() == ShuffleVT) ? static_cast<void> (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5536, __PRETTY_FUNCTION__));; } } while (false)
5535 for (auto Src : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) ((Src.ShuffleVec
.getValueType() == ShuffleVT) ? static_cast<void> (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5536, __PRETTY_FUNCTION__));; } } while (false)
5536 assert(Src.ShuffleVec.getValueType() == ShuffleVT);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) ((Src.ShuffleVec
.getValueType() == ShuffleVT) ? static_cast<void> (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5536, __PRETTY_FUNCTION__));; } } while (false)
5537 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) ((Src.ShuffleVec
.getValueType() == ShuffleVT) ? static_cast<void> (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5536, __PRETTY_FUNCTION__));; } } while (false)
;
5538
5539 // The stars all align, our next step is to produce the mask for the shuffle.
5540 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
5541 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
5542 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
5543 SDValue Entry = Op.getOperand(i);
5544 if (Entry.isUndef())
5545 continue;
5546
5547 auto Src = find(Sources, Entry.getOperand(0));
5548 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
5549
5550 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
5551 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
5552 // segment.
5553 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
5554 int BitsDefined =
5555 std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits());
5556 int LanesDefined = BitsDefined / BitsPerShuffleLane;
5557
5558 // This source is expected to fill ResMultiplier lanes of the final shuffle,
5559 // starting at the appropriate offset.
5560 int *LaneMask = &Mask[i * ResMultiplier];
5561
5562 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
5563 ExtractBase += NumElts * (Src - Sources.begin());
5564 for (int j = 0; j < LanesDefined; ++j)
5565 LaneMask[j] = ExtractBase + j;
5566 }
5567
5568 // Final check before we try to produce nonsense...
5569 if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
5570 DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: illegal shuffle mask\n"
; } } while (false)
;
5571 return SDValue();
5572 }
5573
5574 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
5575 for (unsigned i = 0; i < Sources.size(); ++i)
5576 ShuffleOps[i] = Sources[i].ShuffleVec;
5577
5578 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
5579 ShuffleOps[1], Mask);
5580 SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
5581
5582 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5583 dbgs() << "Reshuffle, creating node: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5584 Shuffle.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5585 dbgs() << "Reshuffle, creating node: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5586 V.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5587 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
;
5588
5589 return V;
5590}
5591
5592// check if an EXT instruction can handle the shuffle mask when the
5593// vector sources of the shuffle are the same.
5594static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5595 unsigned NumElts = VT.getVectorNumElements();
5596
5597 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5598 if (M[0] < 0)
5599 return false;
5600
5601 Imm = M[0];
5602
5603 // If this is a VEXT shuffle, the immediate value is the index of the first
5604 // element. The other shuffle indices must be the successive elements after
5605 // the first one.
5606 unsigned ExpectedElt = Imm;
5607 for (unsigned i = 1; i < NumElts; ++i) {
5608 // Increment the expected index. If it wraps around, just follow it
5609 // back to index zero and keep going.
5610 ++ExpectedElt;
5611 if (ExpectedElt == NumElts)
5612 ExpectedElt = 0;
5613
5614 if (M[i] < 0)
5615 continue; // ignore UNDEF indices
5616 if (ExpectedElt != static_cast<unsigned>(M[i]))
5617 return false;
5618 }
5619
5620 return true;
5621}
5622
5623// check if an EXT instruction can handle the shuffle mask when the
5624// vector sources of the shuffle are different.
5625static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
5626 unsigned &Imm) {
5627 // Look for the first non-undef element.
5628 const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
5629
5630 // Benefit form APInt to handle overflow when calculating expected element.
5631 unsigned NumElts = VT.getVectorNumElements();
5632 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
5633 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
5634 // The following shuffle indices must be the successive elements after the
5635 // first real element.
5636 const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
5637 [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
5638 if (FirstWrongElt != M.end())
5639 return false;
5640
5641 // The index of an EXT is the first element if it is not UNDEF.
5642 // Watch out for the beginning UNDEFs. The EXT index should be the expected
5643 // value of the first element. E.g.
5644 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
5645 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
5646 // ExpectedElt is the last mask index plus 1.
5647 Imm = ExpectedElt.getZExtValue();
5648
5649 // There are two difference cases requiring to reverse input vectors.
5650 // For example, for vector <4 x i32> we have the following cases,
5651 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
5652 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
5653 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
5654 // to reverse two input vectors.
5655 if (Imm < NumElts)
5656 ReverseEXT = true;
5657 else
5658 Imm -= NumElts;
5659
5660 return true;
5661}
5662
5663/// isREVMask - Check if a vector shuffle corresponds to a REV
5664/// instruction with the specified blocksize. (The order of the elements
5665/// within each block of the vector is reversed.)
5666static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5667 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&(((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for REV are: 16, 32, 64") ? static_cast
<void> (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5668, __PRETTY_FUNCTION__))
5668 "Only possible block sizes for REV are: 16, 32, 64")(((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
"Only possible block sizes for REV are: 16, 32, 64") ? static_cast
<void> (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5668, __PRETTY_FUNCTION__))
;
5669
5670 unsigned EltSz = VT.getScalarSizeInBits();
5671 if (EltSz == 64)
5672 return false;
5673
5674 unsigned NumElts = VT.getVectorNumElements();
5675 unsigned BlockElts = M[0] + 1;
5676 // If the first shuffle index is UNDEF, be optimistic.
5677 if (M[0] < 0)
5678 BlockElts = BlockSize / EltSz;
5679
5680 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5681 return false;
5682
5683 for (unsigned i = 0; i < NumElts; ++i) {
5684 if (M[i] < 0)
5685 continue; // ignore UNDEF indices
5686 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
5687 return false;
5688 }
5689
5690 return true;
5691}
5692
5693static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5694 unsigned NumElts = VT.getVectorNumElements();
5695 WhichResult = (M[0] == 0 ? 0 : 1);
5696 unsigned Idx = WhichResult * NumElts / 2;
5697 for (unsigned i = 0; i != NumElts; i += 2) {
5698 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
5699 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
5700 return false;
5701 Idx += 1;
5702 }
5703
5704 return true;
5705}
5706
5707static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5708 unsigned NumElts = VT.getVectorNumElements();
5709 WhichResult = (M[0] == 0 ? 0 : 1);
5710 for (unsigned i = 0; i != NumElts; ++i) {
5711 if (M[i] < 0)
5712 continue; // ignore UNDEF indices
5713 if ((unsigned)M[i] != 2 * i + WhichResult)
5714 return false;
5715 }
5716
5717 return true;
5718}
5719
5720static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5721 unsigned NumElts = VT.getVectorNumElements();
5722 WhichResult = (M[0] == 0 ? 0 : 1);
5723 for (unsigned i = 0; i < NumElts; i += 2) {
5724 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
5725 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
5726 return false;
5727 }
5728 return true;
5729}
5730
5731/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
5732/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5733/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5734static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5735 unsigned NumElts = VT.getVectorNumElements();
5736 WhichResult = (M[0] == 0 ? 0 : 1);
5737 unsigned Idx = WhichResult * NumElts / 2;
5738 for (unsigned i = 0; i != NumElts; i += 2) {
5739 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
5740 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
5741 return false;
5742 Idx += 1;
5743 }
5744
5745 return true;
5746}
5747
5748/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
5749/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5750/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5751static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5752 unsigned Half = VT.getVectorNumElements() / 2;
5753 WhichResult = (M[0] == 0 ? 0 : 1);
5754 for (unsigned j = 0; j != 2; ++j) {
5755 unsigned Idx = WhichResult;
5756 for (unsigned i = 0; i != Half; ++i) {
5757 int MIdx = M[i + j * Half];
5758 if (MIdx >= 0 && (unsigned)MIdx != Idx)
5759 return false;
5760 Idx += 2;
5761 }
5762 }
5763
5764 return true;
5765}
5766
5767/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
5768/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5769/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5770static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5771 unsigned NumElts = VT.getVectorNumElements();
5772 WhichResult = (M[0] == 0 ? 0 : 1);
5773 for (unsigned i = 0; i < NumElts; i += 2) {
5774 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
5775 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
5776 return false;
5777 }
5778 return true;
5779}
5780
5781static bool isINSMask(ArrayRef<int> M, int NumInputElements,
5782 bool &DstIsLeft, int &Anomaly) {
5783 if (M.size() != static_cast<size_t>(NumInputElements))
5784 return false;
5785
5786 int NumLHSMatch = 0, NumRHSMatch = 0;
5787 int LastLHSMismatch = -1, LastRHSMismatch = -1;
5788
5789 for (int i = 0; i < NumInputElements; ++i) {
5790 if (M[i] == -1) {
5791 ++NumLHSMatch;
5792 ++NumRHSMatch;
5793 continue;
5794 }
5795
5796 if (M[i] == i)
5797 ++NumLHSMatch;
5798 else
5799 LastLHSMismatch = i;
5800
5801 if (M[i] == i + NumInputElements)
5802 ++NumRHSMatch;
5803 else
5804 LastRHSMismatch = i;
5805 }
5806
5807 if (NumLHSMatch == NumInputElements - 1) {
5808 DstIsLeft = true;
5809 Anomaly = LastLHSMismatch;
5810 return true;
5811 } else if (NumRHSMatch == NumInputElements - 1) {
5812 DstIsLeft = false;
5813 Anomaly = LastRHSMismatch;
5814 return true;
5815 }
5816
5817 return false;
5818}
5819
5820static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
5821 if (VT.getSizeInBits() != 128)
5822 return false;
5823
5824 unsigned NumElts = VT.getVectorNumElements();
5825
5826 for (int I = 0, E = NumElts / 2; I != E; I++) {
5827 if (Mask[I] != I)
5828 return false;
5829 }
5830
5831 int Offset = NumElts / 2;
5832 for (int I = NumElts / 2, E = NumElts; I != E; I++) {
5833 if (Mask[I] != I + SplitLHS * Offset)
5834 return false;
5835 }
5836
5837 return true;
5838}
5839
5840static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
5841 SDLoc DL(Op);
5842 EVT VT = Op.getValueType();
5843 SDValue V0 = Op.getOperand(0);
5844 SDValue V1 = Op.getOperand(1);
5845 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5846
5847 if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
5848 VT.getVectorElementType() != V1.getValueType().getVectorElementType())
5849 return SDValue();
5850
5851 bool SplitV0 = V0.getValueSizeInBits() == 128;
5852
5853 if (!isConcatMask(Mask, VT, SplitV0))
5854 return SDValue();
5855
5856 EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
5857 VT.getVectorNumElements() / 2);
5858 if (SplitV0) {
5859 V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
5860 DAG.getConstant(0, DL, MVT::i64));
5861 }
5862 if (V1.getValueSizeInBits() == 128) {
5863 V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
5864 DAG.getConstant(0, DL, MVT::i64));
5865 }
5866 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
5867}
5868
5869/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
5870/// the specified operations to build the shuffle.
5871static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
5872 SDValue RHS, SelectionDAG &DAG,
5873 const SDLoc &dl) {
5874 unsigned OpNum = (PFEntry >> 26) & 0x0F;
5875 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
5876 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
5877
5878 enum {
5879 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5880 OP_VREV,
5881 OP_VDUP0,
5882 OP_VDUP1,
5883 OP_VDUP2,
5884 OP_VDUP3,
5885 OP_VEXT1,
5886 OP_VEXT2,
5887 OP_VEXT3,
5888 OP_VUZPL, // VUZP, left result
5889 OP_VUZPR, // VUZP, right result
5890 OP_VZIPL, // VZIP, left result
5891 OP_VZIPR, // VZIP, right result
5892 OP_VTRNL, // VTRN, left result
5893 OP_VTRNR // VTRN, right result
5894 };
5895
5896 if (OpNum == OP_COPY) {
5897 if (LHSID == (1 * 9 + 2) * 9 + 3)
5898 return LHS;
5899 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!")((LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!"
) ? static_cast<void> (0) : __assert_fail ("LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5899, __PRETTY_FUNCTION__))
;
5900 return RHS;
5901 }
5902
5903 SDValue OpLHS, OpRHS;
5904 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
5905 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
5906 EVT VT = OpLHS.getValueType();
5907
5908 switch (OpNum) {
5909 default:
5910 llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5910)
;
5911 case OP_VREV:
5912 // VREV divides the vector in half and swaps within the half.
5913 if (VT.getVectorElementType() == MVT::i32 ||
5914 VT.getVectorElementType() == MVT::f32)
5915 return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
5916 // vrev <4 x i16> -> REV32
5917 if (VT.getVectorElementType() == MVT::i16 ||
5918 VT.getVectorElementType() == MVT::f16)
5919 return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
5920 // vrev <4 x i8> -> REV16
5921 assert(VT.getVectorElementType() == MVT::i8)((VT.getVectorElementType() == MVT::i8) ? static_cast<void
> (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5921, __PRETTY_FUNCTION__))
;
5922 return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
5923 case OP_VDUP0:
5924 case OP_VDUP1:
5925 case OP_VDUP2:
5926 case OP_VDUP3: {
5927 EVT EltTy = VT.getVectorElementType();
5928 unsigned Opcode;
5929 if (EltTy == MVT::i8)
5930 Opcode = AArch64ISD::DUPLANE8;
5931 else if (EltTy == MVT::i16 || EltTy == MVT::f16)
5932 Opcode = AArch64ISD::DUPLANE16;
5933 else if (EltTy == MVT::i32 || EltTy == MVT::f32)
5934 Opcode = AArch64ISD::DUPLANE32;
5935 else if (EltTy == MVT::i64 || EltTy == MVT::f64)
5936 Opcode = AArch64ISD::DUPLANE64;
5937 else
5938 llvm_unreachable("Invalid vector element type?")::llvm::llvm_unreachable_internal("Invalid vector element type?"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5938)
;
5939
5940 if (VT.getSizeInBits() == 64)
5941 OpLHS = WidenVector(OpLHS, DAG);
5942 SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
5943 return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
5944 }
5945 case OP_VEXT1:
5946 case OP_VEXT2:
5947 case OP_VEXT3: {
5948 unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
5949 return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
5950 DAG.getConstant(Imm, dl, MVT::i32));
5951 }
5952 case OP_VUZPL:
5953 return DAG.getNode(AArch64ISD::UZP1, dl, DAG.getVTList(VT, VT), OpLHS,
5954 OpRHS);
5955 case OP_VUZPR:
5956 return DAG.getNode(AArch64ISD::UZP2, dl, DAG.getVTList(VT, VT), OpLHS,
5957 OpRHS);
5958 case OP_VZIPL:
5959 return DAG.getNode(AArch64ISD::ZIP1, dl, DAG.getVTList(VT, VT), OpLHS,
5960 OpRHS);
5961 case OP_VZIPR:
5962 return DAG.getNode(AArch64ISD::ZIP2, dl, DAG.getVTList(VT, VT), OpLHS,
5963 OpRHS);
5964 case OP_VTRNL:
5965 return DAG.getNode(AArch64ISD::TRN1, dl, DAG.getVTList(VT, VT), OpLHS,
5966 OpRHS);
5967 case OP_VTRNR:
5968 return DAG.getNode(AArch64ISD::TRN2, dl, DAG.getVTList(VT, VT), OpLHS,
5969 OpRHS);
5970 }
5971}
5972
5973static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
5974 SelectionDAG &DAG) {
5975 // Check to see if we can use the TBL instruction.
5976 SDValue V1 = Op.getOperand(0);
5977 SDValue V2 = Op.getOperand(1);
5978 SDLoc DL(Op);
5979
5980 EVT EltVT = Op.getValueType().getVectorElementType();
5981 unsigned BytesPerElt = EltVT.getSizeInBits() / 8;
5982
5983 SmallVector<SDValue, 8> TBLMask;
5984 for (int Val : ShuffleMask) {
5985 for (unsigned Byte = 0; Byte < BytesPerElt; ++Byte) {
5986 unsigned Offset = Byte + Val * BytesPerElt;
5987 TBLMask.push_back(DAG.getConstant(Offset, DL, MVT::i32));
5988 }
5989 }
5990
5991 MVT IndexVT = MVT::v8i8;
5992 unsigned IndexLen = 8;
5993 if (Op.getValueSizeInBits() == 128) {
5994 IndexVT = MVT::v16i8;
5995 IndexLen = 16;
5996 }
5997
5998 SDValue V1Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V1);
5999 SDValue V2Cst = DAG.getNode(ISD::BITCAST, DL, IndexVT, V2);
6000
6001 SDValue Shuffle;
6002 if (V2.getNode()->isUndef()) {
6003 if (IndexLen == 8)
6004 V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V1Cst);
6005 Shuffle = DAG.getNode(
6006 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
6007 DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
6008 DAG.getBuildVector(IndexVT, DL,
6009 makeArrayRef(TBLMask.data(), IndexLen)));
6010 } else {
6011 if (IndexLen == 8) {
6012 V1Cst = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, V1Cst, V2Cst);
6013 Shuffle = DAG.getNode(
6014 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
6015 DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), V1Cst,
6016 DAG.getBuildVector(IndexVT, DL,
6017 makeArrayRef(TBLMask.data(), IndexLen)));
6018 } else {
6019 // FIXME: We cannot, for the moment, emit a TBL2 instruction because we
6020 // cannot currently represent the register constraints on the input
6021 // table registers.
6022 // Shuffle = DAG.getNode(AArch64ISD::TBL2, DL, IndexVT, V1Cst, V2Cst,
6023 // DAG.getBuildVector(IndexVT, DL, &TBLMask[0],
6024 // IndexLen));
6025 Shuffle = DAG.getNode(
6026 ISD::INTRINSIC_WO_CHAIN, DL, IndexVT,
6027 DAG.getConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i32), V1Cst,
6028 V2Cst, DAG.getBuildVector(IndexVT, DL,
6029 makeArrayRef(TBLMask.data(), IndexLen)));
6030 }
6031 }
6032 return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Shuffle);
6033}
6034
6035static unsigned getDUPLANEOp(EVT EltType) {
6036 if (EltType == MVT::i8)
6037 return AArch64ISD::DUPLANE8;
6038 if (EltType == MVT::i16 || EltType == MVT::f16)
6039 return AArch64ISD::DUPLANE16;
6040 if (EltType == MVT::i32 || EltType == MVT::f32)
6041 return AArch64ISD::DUPLANE32;
6042 if (EltType == MVT::i64 || EltType == MVT::f64)
6043 return AArch64ISD::DUPLANE64;
6044
6045 llvm_unreachable("Invalid vector element type?")::llvm::llvm_unreachable_internal("Invalid vector element type?"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6045)
;
6046}
6047
6048SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
6049 SelectionDAG &DAG) const {
6050 SDLoc dl(Op);
6051 EVT VT = Op.getValueType();
6052
6053 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
6054
6055 // Convert shuffles that are directly supported on NEON to target-specific
6056 // DAG nodes, instead of keeping them as shuffles and matching them again
6057 // during code selection. This is more efficient and avoids the possibility
6058 // of inconsistencies between legalization and selection.
6059 ArrayRef<int> ShuffleMask = SVN->getMask();
6060
6061 SDValue V1 = Op.getOperand(0);
6062 SDValue V2 = Op.getOperand(1);
6063
6064 if (SVN->isSplat()) {
6065 int Lane = SVN->getSplatIndex();
6066 // If this is undef splat, generate it via "just" vdup, if possible.
6067 if (Lane == -1)
6068 Lane = 0;
6069
6070 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
6071 return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
6072 V1.getOperand(0));
6073 // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
6074 // constant. If so, we can just reference the lane's definition directly.
6075 if (V1.getOpcode() == ISD::BUILD_VECTOR &&
6076 !isa<ConstantSDNode>(V1.getOperand(Lane)))
6077 return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
6078
6079 // Otherwise, duplicate from the lane of the input vector.
6080 unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
6081
6082 // SelectionDAGBuilder may have "helpfully" already extracted or conatenated
6083 // to make a vector of the same size as this SHUFFLE. We can ignore the
6084 // extract entirely, and canonicalise the concat using WidenVector.
6085 if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
6086 Lane += cast<ConstantSDNode>(V1.getOperand(1))->getZExtValue();
6087 V1 = V1.getOperand(0);
6088 } else if (V1.getOpcode() == ISD::CONCAT_VECTORS) {
6089 unsigned Idx = Lane >= (int)VT.getVectorNumElements() / 2;
6090 Lane -= Idx * VT.getVectorNumElements() / 2;
6091 V1 = WidenVector(V1.getOperand(Idx), DAG);
6092 } else if (VT.getSizeInBits() == 64)
6093 V1 = WidenVector(V1, DAG);
6094
6095 return DAG.getNode(Opcode, dl, VT, V1, DAG.getConstant(Lane, dl, MVT::i64));
6096 }
6097
6098 if (isREVMask(ShuffleMask, VT, 64))
6099 return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1, V2);
6100 if (isREVMask(ShuffleMask, VT, 32))
6101 return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1, V2);
6102 if (isREVMask(ShuffleMask, VT, 16))
6103 return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1, V2);
6104
6105 bool ReverseEXT = false;
6106 unsigned Imm;
6107 if (isEXTMask(ShuffleMask, VT, ReverseEXT, Imm)) {
6108 if (ReverseEXT)
6109 std::swap(V1, V2);
6110 Imm *= getExtFactor(V1);
6111 return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
6112 DAG.getConstant(Imm, dl, MVT::i32));
6113 } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
6114 Imm *= getExtFactor(V1);
6115 return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
6116 DAG.getConstant(Imm, dl, MVT::i32));
6117 }
6118
6119 unsigned WhichResult;
6120 if (isZIPMask(ShuffleMask, VT, WhichResult)) {
6121 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
6122 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
6123 }
6124 if (isUZPMask(ShuffleMask, VT, WhichResult)) {
6125 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
6126 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
6127 }
6128 if (isTRNMask(ShuffleMask, VT, WhichResult)) {
6129 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
6130 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
6131 }
6132
6133 if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
6134 unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
6135 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
6136 }
6137 if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
6138 unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
6139 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
6140 }
6141 if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
6142 unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
6143 return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
6144 }
6145
6146 if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
6147 return Concat;
6148
6149 bool DstIsLeft;
6150 int Anomaly;
6151 int NumInputElements = V1.getValueType().getVectorNumElements();
6152 if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
6153 SDValue DstVec = DstIsLeft ? V1 : V2;
6154 SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
6155
6156 SDValue SrcVec = V1;
6157 int SrcLane = ShuffleMask[Anomaly];
6158 if (SrcLane >= NumInputElements) {
6159 SrcVec = V2;
6160 SrcLane -= VT.getVectorNumElements();
6161 }
6162 SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
6163
6164 EVT ScalarVT = VT.getVectorElementType();
6165
6166 if (ScalarVT.getSizeInBits() < 32 && ScalarVT.isInteger())
6167 ScalarVT = MVT::i32;
6168
6169 return DAG.getNode(
6170 ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
6171 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
6172 DstLaneV);
6173 }
6174
6175 // If the shuffle is not directly supported and it has 4 elements, use
6176 // the PerfectShuffle-generated table to synthesize it from other shuffles.
6177 unsigned NumElts = VT.getVectorNumElements();
6178 if (NumElts == 4) {
6179 unsigned PFIndexes[4];
6180 for (unsigned i = 0; i != 4; ++i) {
6181 if (ShuffleMask[i] < 0)
6182 PFIndexes[i] = 8;
6183 else
6184 PFIndexes[i] = ShuffleMask[i];
6185 }
6186
6187 // Compute the index in the perfect shuffle table.
6188 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
6189 PFIndexes[2] * 9 + PFIndexes[3];
6190 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6191 unsigned Cost = (PFEntry >> 30);
6192
6193 if (Cost <= 4)
6194 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
6195 }
6196
6197 return GenerateTBL(Op, ShuffleMask, DAG);
6198}
6199
6200static bool resolveBuildVector(BuildVectorSDNode *BVN, APInt &CnstBits,
6201 APInt &UndefBits) {
6202 EVT VT = BVN->getValueType(0);
6203 APInt SplatBits, SplatUndef;
6204 unsigned SplatBitSize;
6205 bool HasAnyUndefs;
6206 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6207 unsigned NumSplats = VT.getSizeInBits() / SplatBitSize;
6208
6209 for (unsigned i = 0; i < NumSplats; ++i) {
6210 CnstBits <<= SplatBitSize;
6211 UndefBits <<= SplatBitSize;
6212 CnstBits |= SplatBits.zextOrTrunc(VT.getSizeInBits());
6213 UndefBits |= (SplatBits ^ SplatUndef).zextOrTrunc(VT.getSizeInBits());
6214 }
6215
6216 return true;
6217 }
6218
6219 return false;
6220}
6221
6222SDValue AArch64TargetLowering::LowerVectorAND(SDValue Op,
6223 SelectionDAG &DAG) const {
6224 BuildVectorSDNode *BVN =
6225 dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
6226 SDValue LHS = Op.getOperand(0);
6227 SDLoc dl(Op);
6228 EVT VT = Op.getValueType();
6229
6230 if (!BVN)
6231 return Op;
6232
6233 APInt CnstBits(VT.getSizeInBits(), 0);
6234 APInt UndefBits(VT.getSizeInBits(), 0);
6235 if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
6236 // We only have BIC vector immediate instruction, which is and-not.
6237 CnstBits = ~CnstBits;
6238
6239 // We make use of a little bit of goto ickiness in order to avoid having to
6240 // duplicate the immediate matching logic for the undef toggled case.
6241 bool SecondTry = false;
6242 AttemptModImm:
6243
6244 if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
6245 CnstBits = CnstBits.zextOrTrunc(64);
6246 uint64_t CnstVal = CnstBits.getZExtValue();
6247
6248 if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
6249 CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
6250 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6251 SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6252 DAG.getConstant(CnstVal, dl, MVT::i32),
6253 DAG.getConstant(0, dl, MVT::i32));
6254 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6255 }
6256
6257 if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
6258 CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
6259 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6260 SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6261 DAG.getConstant(CnstVal, dl, MVT::i32),
6262 DAG.getConstant(8, dl, MVT::i32));
6263 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6264 }
6265
6266 if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
6267 CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
6268 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6269 SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6270 DAG.getConstant(CnstVal, dl, MVT::i32),
6271 DAG.getConstant(16, dl, MVT::i32));
6272 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6273 }
6274
6275 if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
6276 CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
6277 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6278 SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6279 DAG.getConstant(CnstVal, dl, MVT::i32),
6280 DAG.getConstant(24, dl, MVT::i32));
6281 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6282 }
6283
6284 if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
6285 CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
6286 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6287 SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6288 DAG.getConstant(CnstVal, dl, MVT::i32),
6289 DAG.getConstant(0, dl, MVT::i32));
6290 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6291 }
6292
6293 if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
6294 CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
6295 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6296 SDValue Mov = DAG.getNode(AArch64ISD::BICi, dl, MovTy, LHS,
6297 DAG.getConstant(CnstVal, dl, MVT::i32),
6298 DAG.getConstant(8, dl, MVT::i32));
6299 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6300 }
6301 }
6302
6303 if (SecondTry)
6304 goto FailedModImm;
6305 SecondTry = true;
6306 CnstBits = ~UndefBits;
6307 goto AttemptModImm;
6308 }
6309
6310// We can always fall back to a non-immediate AND.
6311FailedModImm:
6312 return Op;
6313}
6314
6315// Specialized code to quickly find if PotentialBVec is a BuildVector that
6316// consists of only the same constant int value, returned in reference arg
6317// ConstVal
6318static bool isAllConstantBuildVector(const SDValue &PotentialBVec,
6319 uint64_t &ConstVal) {
6320 BuildVectorSDNode *Bvec = dyn_cast<BuildVectorSDNode>(PotentialBVec);
6321 if (!Bvec)
6322 return false;
6323 ConstantSDNode *FirstElt = dyn_cast<ConstantSDNode>(Bvec->getOperand(0));
6324 if (!FirstElt)
6325 return false;
6326 EVT VT = Bvec->getValueType(0);
6327 unsigned NumElts = VT.getVectorNumElements();
6328 for (unsigned i = 1; i < NumElts; ++i)
6329 if (dyn_cast<ConstantSDNode>(Bvec->getOperand(i)) != FirstElt)
6330 return false;
6331 ConstVal = FirstElt->getZExtValue();
6332 return true;
6333}
6334
6335static unsigned getIntrinsicID(const SDNode *N) {
6336 unsigned Opcode = N->getOpcode();
6337 switch (Opcode) {
6338 default:
6339 return Intrinsic::not_intrinsic;
6340 case ISD::INTRINSIC_WO_CHAIN: {
6341 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
6342 if (IID < Intrinsic::num_intrinsics)
6343 return IID;
6344 return Intrinsic::not_intrinsic;
6345 }
6346 }
6347}
6348
6349// Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
6350// to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
6351// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
6352// Also, logical shift right -> sri, with the same structure.
6353static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
6354 EVT VT = N->getValueType(0);
6355
6356 if (!VT.isVector())
6357 return SDValue();
6358
6359 SDLoc DL(N);
6360
6361 // Is the first op an AND?
6362 const SDValue And = N->getOperand(0);
6363 if (And.getOpcode() != ISD::AND)
6364 return SDValue();
6365
6366 // Is the second op an shl or lshr?
6367 SDValue Shift = N->getOperand(1);
6368 // This will have been turned into: AArch64ISD::VSHL vector, #shift
6369 // or AArch64ISD::VLSHR vector, #shift
6370 unsigned ShiftOpc = Shift.getOpcode();
6371 if ((ShiftOpc != AArch64ISD::VSHL && ShiftOpc != AArch64ISD::VLSHR))
6372 return SDValue();
6373 bool IsShiftRight = ShiftOpc == AArch64ISD::VLSHR;
6374
6375 // Is the shift amount constant?
6376 ConstantSDNode *C2node = dyn_cast<ConstantSDNode>(Shift.getOperand(1));
6377 if (!C2node)
6378 return SDValue();
6379
6380 // Is the and mask vector all constant?
6381 uint64_t C1;
6382 if (!isAllConstantBuildVector(And.getOperand(1), C1))
6383 return SDValue();
6384
6385 // Is C1 == ~C2, taking into account how much one can shift elements of a
6386 // particular size?
6387 uint64_t C2 = C2node->getZExtValue();
6388 unsigned ElemSizeInBits = VT.getScalarSizeInBits();
6389 if (C2 > ElemSizeInBits)
6390 return SDValue();
6391 unsigned ElemMask = (1 << ElemSizeInBits) - 1;
6392 if ((C1 & ElemMask) != (~C2 & ElemMask))
6393 return SDValue();
6394
6395 SDValue X = And.getOperand(0);
6396 SDValue Y = Shift.getOperand(0);
6397
6398 unsigned Intrin =
6399 IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
6400 SDValue ResultSLI =
6401 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
6402 DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
6403 Shift.getOperand(1));
6404
6405 DEBUG(dbgs() << "aarch64-lower: transformed: \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "aarch64-lower: transformed: \n"
; } } while (false)
;
6406 DEBUG(N->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { N->dump(&DAG); } } while (false)
;
6407 DEBUG(dbgs() << "into: \n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "into: \n"; } } while (false
)
;
6408 DEBUG(ResultSLI->dump(&DAG))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { ResultSLI->dump(&DAG); } } while (
false)
;
6409
6410 ++NumShiftInserts;
6411 return ResultSLI;
6412}
6413
6414SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
6415 SelectionDAG &DAG) const {
6416 // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
6417 if (EnableAArch64SlrGeneration) {
6418 if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
6419 return Res;
6420 }
6421
6422 BuildVectorSDNode *BVN =
6423 dyn_cast<BuildVectorSDNode>(Op.getOperand(0).getNode());
6424 SDValue LHS = Op.getOperand(1);
6425 SDLoc dl(Op);
6426 EVT VT = Op.getValueType();
6427
6428 // OR commutes, so try swapping the operands.
6429 if (!BVN) {
6430 LHS = Op.getOperand(0);
6431 BVN = dyn_cast<BuildVectorSDNode>(Op.getOperand(1).getNode());
6432 }
6433 if (!BVN)
6434 return Op;
6435
6436 APInt CnstBits(VT.getSizeInBits(), 0);
6437 APInt UndefBits(VT.getSizeInBits(), 0);
6438 if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
6439 // We make use of a little bit of goto ickiness in order to avoid having to
6440 // duplicate the immediate matching logic for the undef toggled case.
6441 bool SecondTry = false;
6442 AttemptModImm:
6443
6444 if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
6445 CnstBits = CnstBits.zextOrTrunc(64);
6446 uint64_t CnstVal = CnstBits.getZExtValue();
6447
6448 if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
6449 CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
6450 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6451 SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6452 DAG.getConstant(CnstVal, dl, MVT::i32),
6453 DAG.getConstant(0, dl, MVT::i32));
6454 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6455 }
6456
6457 if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
6458 CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
6459 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6460 SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6461 DAG.getConstant(CnstVal, dl, MVT::i32),
6462 DAG.getConstant(8, dl, MVT::i32));
6463 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6464 }
6465
6466 if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
6467 CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
6468 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6469 SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6470 DAG.getConstant(CnstVal, dl, MVT::i32),
6471 DAG.getConstant(16, dl, MVT::i32));
6472 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6473 }
6474
6475 if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
6476 CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
6477 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6478 SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6479 DAG.getConstant(CnstVal, dl, MVT::i32),
6480 DAG.getConstant(24, dl, MVT::i32));
6481 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6482 }
6483
6484 if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
6485 CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
6486 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6487 SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6488 DAG.getConstant(CnstVal, dl, MVT::i32),
6489 DAG.getConstant(0, dl, MVT::i32));
6490 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6491 }
6492
6493 if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
6494 CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
6495 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6496 SDValue Mov = DAG.getNode(AArch64ISD::ORRi, dl, MovTy, LHS,
6497 DAG.getConstant(CnstVal, dl, MVT::i32),
6498 DAG.getConstant(8, dl, MVT::i32));
6499 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6500 }
6501 }
6502
6503 if (SecondTry)
6504 goto FailedModImm;
6505 SecondTry = true;
6506 CnstBits = UndefBits;
6507 goto AttemptModImm;
6508 }
6509
6510// We can always fall back to a non-immediate OR.
6511FailedModImm:
6512 return Op;
6513}
6514
6515// Normalize the operands of BUILD_VECTOR. The value of constant operands will
6516// be truncated to fit element width.
6517static SDValue NormalizeBuildVector(SDValue Op,
6518 SelectionDAG &DAG) {
6519 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")((Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6519, __PRETTY_FUNCTION__))
;
6520 SDLoc dl(Op);
6521 EVT VT = Op.getValueType();
6522 EVT EltTy= VT.getVectorElementType();
6523
6524 if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16)
6525 return Op;
6526
6527 SmallVector<SDValue, 16> Ops;
6528 for (SDValue Lane : Op->ops()) {
6529 if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
6530 APInt LowBits(EltTy.getSizeInBits(),
6531 CstLane->getZExtValue());
6532 Lane = DAG.getConstant(LowBits.getZExtValue(), dl, MVT::i32);
6533 }
6534 Ops.push_back(Lane);
6535 }
6536 return DAG.getBuildVector(VT, dl, Ops);
6537}
6538
6539SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
6540 SelectionDAG &DAG) const {
6541 SDLoc dl(Op);
6542 EVT VT = Op.getValueType();
6543 Op = NormalizeBuildVector(Op, DAG);
6544 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
6545
6546 APInt CnstBits(VT.getSizeInBits(), 0);
6547 APInt UndefBits(VT.getSizeInBits(), 0);
6548 if (resolveBuildVector(BVN, CnstBits, UndefBits)) {
6549 // We make use of a little bit of goto ickiness in order to avoid having to
6550 // duplicate the immediate matching logic for the undef toggled case.
6551 bool SecondTry = false;
6552 AttemptModImm:
6553
6554 if (CnstBits.getHiBits(64) == CnstBits.getLoBits(64)) {
6555 CnstBits = CnstBits.zextOrTrunc(64);
6556 uint64_t CnstVal = CnstBits.getZExtValue();
6557
6558 // Certain magic vector constants (used to express things like NOT
6559 // and NEG) are passed through unmodified. This allows codegen patterns
6560 // for these operations to match. Special-purpose patterns will lower
6561 // these immediates to MOVIs if it proves necessary.
6562 if (VT.isInteger() && (CnstVal == 0 || CnstVal == ~0ULL))
6563 return Op;
6564
6565 // The many faces of MOVI...
6566 if (AArch64_AM::isAdvSIMDModImmType10(CnstVal)) {
6567 CnstVal = AArch64_AM::encodeAdvSIMDModImmType10(CnstVal);
6568 if (VT.getSizeInBits() == 128) {
6569 SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::v2i64,
6570 DAG.getConstant(CnstVal, dl, MVT::i32));
6571 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6572 }
6573
6574 // Support the V64 version via subregister insertion.
6575 SDValue Mov = DAG.getNode(AArch64ISD::MOVIedit, dl, MVT::f64,
6576 DAG.getConstant(CnstVal, dl, MVT::i32));
6577 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6578 }
6579
6580 if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
6581 CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
6582 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6583 SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6584 DAG.getConstant(CnstVal, dl, MVT::i32),
6585 DAG.getConstant(0, dl, MVT::i32));
6586 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6587 }
6588
6589 if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
6590 CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
6591 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6592 SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6593 DAG.getConstant(CnstVal, dl, MVT::i32),
6594 DAG.getConstant(8, dl, MVT::i32));
6595 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6596 }
6597
6598 if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
6599 CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
6600 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6601 SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6602 DAG.getConstant(CnstVal, dl, MVT::i32),
6603 DAG.getConstant(16, dl, MVT::i32));
6604 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6605 }
6606
6607 if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
6608 CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
6609 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6610 SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6611 DAG.getConstant(CnstVal, dl, MVT::i32),
6612 DAG.getConstant(24, dl, MVT::i32));
6613 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6614 }
6615
6616 if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
6617 CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
6618 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6619 SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6620 DAG.getConstant(CnstVal, dl, MVT::i32),
6621 DAG.getConstant(0, dl, MVT::i32));
6622 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6623 }
6624
6625 if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
6626 CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
6627 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6628 SDValue Mov = DAG.getNode(AArch64ISD::MOVIshift, dl, MovTy,
6629 DAG.getConstant(CnstVal, dl, MVT::i32),
6630 DAG.getConstant(8, dl, MVT::i32));
6631 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6632 }
6633
6634 if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
6635 CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
6636 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6637 SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
6638 DAG.getConstant(CnstVal, dl, MVT::i32),
6639 DAG.getConstant(264, dl, MVT::i32));
6640 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6641 }
6642
6643 if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
6644 CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
6645 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6646 SDValue Mov = DAG.getNode(AArch64ISD::MOVImsl, dl, MovTy,
6647 DAG.getConstant(CnstVal, dl, MVT::i32),
6648 DAG.getConstant(272, dl, MVT::i32));
6649 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6650 }
6651
6652 if (AArch64_AM::isAdvSIMDModImmType9(CnstVal)) {
6653 CnstVal = AArch64_AM::encodeAdvSIMDModImmType9(CnstVal);
6654 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v16i8 : MVT::v8i8;
6655 SDValue Mov = DAG.getNode(AArch64ISD::MOVI, dl, MovTy,
6656 DAG.getConstant(CnstVal, dl, MVT::i32));
6657 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6658 }
6659
6660 // The few faces of FMOV...
6661 if (AArch64_AM::isAdvSIMDModImmType11(CnstVal)) {
6662 CnstVal = AArch64_AM::encodeAdvSIMDModImmType11(CnstVal);
6663 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4f32 : MVT::v2f32;
6664 SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MovTy,
6665 DAG.getConstant(CnstVal, dl, MVT::i32));
6666 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6667 }
6668
6669 if (AArch64_AM::isAdvSIMDModImmType12(CnstVal) &&
6670 VT.getSizeInBits() == 128) {
6671 CnstVal = AArch64_AM::encodeAdvSIMDModImmType12(CnstVal);
6672 SDValue Mov = DAG.getNode(AArch64ISD::FMOV, dl, MVT::v2f64,
6673 DAG.getConstant(CnstVal, dl, MVT::i32));
6674 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6675 }
6676
6677 // The many faces of MVNI...
6678 CnstVal = ~CnstVal;
6679 if (AArch64_AM::isAdvSIMDModImmType1(CnstVal)) {
6680 CnstVal = AArch64_AM::encodeAdvSIMDModImmType1(CnstVal);
6681 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6682 SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6683 DAG.getConstant(CnstVal, dl, MVT::i32),
6684 DAG.getConstant(0, dl, MVT::i32));
6685 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6686 }
6687
6688 if (AArch64_AM::isAdvSIMDModImmType2(CnstVal)) {
6689 CnstVal = AArch64_AM::encodeAdvSIMDModImmType2(CnstVal);
6690 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6691 SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6692 DAG.getConstant(CnstVal, dl, MVT::i32),
6693 DAG.getConstant(8, dl, MVT::i32));
6694 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6695 }
6696
6697 if (AArch64_AM::isAdvSIMDModImmType3(CnstVal)) {
6698 CnstVal = AArch64_AM::encodeAdvSIMDModImmType3(CnstVal);
6699 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6700 SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6701 DAG.getConstant(CnstVal, dl, MVT::i32),
6702 DAG.getConstant(16, dl, MVT::i32));
6703 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6704 }
6705
6706 if (AArch64_AM::isAdvSIMDModImmType4(CnstVal)) {
6707 CnstVal = AArch64_AM::encodeAdvSIMDModImmType4(CnstVal);
6708 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6709 SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6710 DAG.getConstant(CnstVal, dl, MVT::i32),
6711 DAG.getConstant(24, dl, MVT::i32));
6712 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6713 }
6714
6715 if (AArch64_AM::isAdvSIMDModImmType5(CnstVal)) {
6716 CnstVal = AArch64_AM::encodeAdvSIMDModImmType5(CnstVal);
6717 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6718 SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6719 DAG.getConstant(CnstVal, dl, MVT::i32),
6720 DAG.getConstant(0, dl, MVT::i32));
6721 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6722 }
6723
6724 if (AArch64_AM::isAdvSIMDModImmType6(CnstVal)) {
6725 CnstVal = AArch64_AM::encodeAdvSIMDModImmType6(CnstVal);
6726 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v8i16 : MVT::v4i16;
6727 SDValue Mov = DAG.getNode(AArch64ISD::MVNIshift, dl, MovTy,
6728 DAG.getConstant(CnstVal, dl, MVT::i32),
6729 DAG.getConstant(8, dl, MVT::i32));
6730 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6731 }
6732
6733 if (AArch64_AM::isAdvSIMDModImmType7(CnstVal)) {
6734 CnstVal = AArch64_AM::encodeAdvSIMDModImmType7(CnstVal);
6735 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6736 SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
6737 DAG.getConstant(CnstVal, dl, MVT::i32),
6738 DAG.getConstant(264, dl, MVT::i32));
6739 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6740 }
6741
6742 if (AArch64_AM::isAdvSIMDModImmType8(CnstVal)) {
6743 CnstVal = AArch64_AM::encodeAdvSIMDModImmType8(CnstVal);
6744 MVT MovTy = (VT.getSizeInBits() == 128) ? MVT::v4i32 : MVT::v2i32;
6745 SDValue Mov = DAG.getNode(AArch64ISD::MVNImsl, dl, MovTy,
6746 DAG.getConstant(CnstVal, dl, MVT::i32),
6747 DAG.getConstant(272, dl, MVT::i32));
6748 return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
6749 }
6750 }
6751
6752 if (SecondTry)
6753 goto FailedModImm;
6754 SecondTry = true;
6755 CnstBits = UndefBits;
6756 goto AttemptModImm;
6757 }
6758FailedModImm:
6759
6760 // Scan through the operands to find some interesting properties we can
6761 // exploit:
6762 // 1) If only one value is used, we can use a DUP, or
6763 // 2) if only the low element is not undef, we can just insert that, or
6764 // 3) if only one constant value is used (w/ some non-constant lanes),
6765 // we can splat the constant value into the whole vector then fill
6766 // in the non-constant lanes.
6767 // 4) FIXME: If different constant values are used, but we can intelligently
6768 // select the values we'll be overwriting for the non-constant
6769 // lanes such that we can directly materialize the vector
6770 // some other way (MOVI, e.g.), we can be sneaky.
6771 unsigned NumElts = VT.getVectorNumElements();
6772 bool isOnlyLowElement = true;
6773 bool usesOnlyOneValue = true;
6774 bool usesOnlyOneConstantValue = true;
6775 bool isConstant = true;
6776 unsigned NumConstantLanes = 0;
6777 SDValue Value;
6778 SDValue ConstantValue;
6779 for (unsigned i = 0; i < NumElts; ++i) {
6780 SDValue V = Op.getOperand(i);
6781 if (V.isUndef())
6782 continue;
6783 if (i > 0)
6784 isOnlyLowElement = false;
6785 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6786 isConstant = false;
6787
6788 if (isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V)) {
6789 ++NumConstantLanes;
6790 if (!ConstantValue.getNode())
6791 ConstantValue = V;
6792 else if (ConstantValue != V)
6793 usesOnlyOneConstantValue = false;
6794 }
6795
6796 if (!Value.getNode())
6797 Value = V;
6798 else if (V != Value)
6799 usesOnlyOneValue = false;
6800 }
6801
6802 if (!Value.getNode()) {
6803 DEBUG(dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: value undefined, creating undef node\n"
; } } while (false)
;
6804 return DAG.getUNDEF(VT);
6805 }
6806
6807 if (isOnlyLowElement) {
6808 DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n"; } } while (false)
6809 "SCALAR_TO_VECTOR node\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
"SCALAR_TO_VECTOR node\n"; } } while (false)
;
6810 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6811 }
6812
6813 // Use DUP for non-constant splats. For f32 constant splats, reduce to
6814 // i32 and try again.
6815 if (usesOnlyOneValue) {
6816 if (!isConstant) {
6817 if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6818 Value.getValueType() != VT) {
6819 DEBUG(dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n"
; } } while (false)
;
6820 return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
6821 }
6822
6823 // This is actually a DUPLANExx operation, which keeps everything vectory.
6824
6825 SDValue Lane = Value.getOperand(1);
6826 Value = Value.getOperand(0);
6827 if (Value.getValueSizeInBits() == 64) {
6828 DEBUG(dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
6829 "widening it\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: DUPLANE works on 128-bit vectors, "
"widening it\n"; } } while (false)
;
6830 Value = WidenVector(Value, DAG);
6831 }
6832
6833 unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
6834 return DAG.getNode(Opcode, dl, VT, Value, Lane);
6835 }
6836
6837 if (VT.getVectorElementType().isFloatingPoint()) {
6838 SmallVector<SDValue, 8> Ops;
6839 EVT EltTy = VT.getVectorElementType();
6840 assert ((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) &&(((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64
) && "Unsupported floating-point vector type") ? static_cast
<void> (0) : __assert_fail ("(EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) && \"Unsupported floating-point vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6841, __PRETTY_FUNCTION__))
6841 "Unsupported floating-point vector type")(((EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64
) && "Unsupported floating-point vector type") ? static_cast
<void> (0) : __assert_fail ("(EltTy == MVT::f16 || EltTy == MVT::f32 || EltTy == MVT::f64) && \"Unsupported floating-point vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6841, __PRETTY_FUNCTION__))
;
6842 DEBUG(dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
6843 "BITCASTS, and try again\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: float constant splats, creating int "
"BITCASTS, and try again\n"; } } while (false)
;
6844 MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
6845 for (unsigned i = 0; i < NumElts; ++i)
6846 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
6847 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
6848 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6849 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
6850 dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
6851 Val.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
6852 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: "
; Val.dump();; } } while (false)
;
6853 Val = LowerBUILD_VECTOR(Val, DAG);
6854 if (Val.getNode())
6855 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6856 }
6857 }
6858
6859 // If there was only one constant value used and for more than one lane,
6860 // start by splatting that value, then replace the non-constant lanes. This
6861 // is better than the default, which will perform a separate initialization
6862 // for each lane.
6863 if (NumConstantLanes > 0 && usesOnlyOneConstantValue) {
6864 SDValue Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
6865 // Now insert the non-constant lanes.
6866 for (unsigned i = 0; i < NumElts; ++i) {
6867 SDValue V = Op.getOperand(i);
6868 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
6869 if (!isa<ConstantSDNode>(V) && !isa<ConstantFPSDNode>(V)) {
6870 // Note that type legalization likely mucked about with the VT of the
6871 // source operand, so we may have to convert it here before inserting.
6872 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
6873 }
6874 }
6875 return Val;
6876 }
6877
6878 // This will generate a load from the constant pool.
6879 if (isConstant) {
6880 DEBUG(dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
6881 "expansion\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: all elements are constant, use default "
"expansion\n"; } } while (false)
;
6882 return SDValue();
6883 }
6884
6885 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6886 if (NumElts >= 4) {
6887 if (SDValue shuffle = ReconstructShuffle(Op, DAG))
6888 return shuffle;
6889 }
6890
6891 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6892 // know the default expansion would otherwise fall back on something even
6893 // worse. For a vector with one or two non-undef values, that's
6894 // scalar_to_vector for the elements followed by a shuffle (provided the
6895 // shuffle is valid for the target) and materialization element by element
6896 // on the stack followed by a load for everything else.
6897 if (!isConstant && !usesOnlyOneValue) {
6898 DEBUG(dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
6899 "of INSERT_VECTOR_ELT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: alternatives failed, creating sequence "
"of INSERT_VECTOR_ELT\n"; } } while (false)
;
6900
6901 SDValue Vec = DAG.getUNDEF(VT);
6902 SDValue Op0 = Op.getOperand(0);
6903 unsigned i = 0;
6904
6905 // Use SCALAR_TO_VECTOR for lane zero to
6906 // a) Avoid a RMW dependency on the full vector register, and
6907 // b) Allow the register coalescer to fold away the copy if the
6908 // value is already in an S or D register, and we're forced to emit an
6909 // INSERT_SUBREG that we can't fold anywhere.
6910 //
6911 // We also allow types like i8 and i16 which are illegal scalar but legal
6912 // vector element types. After type-legalization the inserted value is
6913 // extended (i32) and it is safe to cast them to the vector type by ignoring
6914 // the upper bits of the lowest lane (e.g. v8i8, v4i16).
6915 if (!Op0.isUndef()) {
6916 DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Creating node for op0, it is not undefined:\n"
; } } while (false)
;
6917 Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
6918 ++i;
6919 }
6920 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
6921 if (i < NumElts)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
6922 dbgs() << "Creating nodes for the other vector elements:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
6923 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { if (i < NumElts) dbgs() << "Creating nodes for the other vector elements:\n"
;; } } while (false)
;
6924 for (; i < NumElts; ++i) {
6925 SDValue V = Op.getOperand(i);
6926 if (V.isUndef())
6927 continue;
6928 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
6929 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6930 }
6931 return Vec;
6932 }
6933
6934 DEBUG(dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
6935 "better alternative\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "LowerBUILD_VECTOR: use default expansion, failed to find "
"better alternative\n"; } } while (false)
;
6936 return SDValue();
6937}
6938
6939SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
6940 SelectionDAG &DAG) const {
6941 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!")((Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::INSERT_VECTOR_ELT && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6941, __PRETTY_FUNCTION__))
;
6942
6943 // Check for non-constant or out of range lane.
6944 EVT VT = Op.getOperand(0).getValueType();
6945 ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(2));
6946 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
6947 return SDValue();
6948
6949
6950 // Insertion/extraction are legal for V128 types.
6951 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
6952 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
6953 VT == MVT::v8f16)
6954 return Op;
6955
6956 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
6957 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
6958 return SDValue();
6959
6960 // For V64 types, we perform insertion by expanding the value
6961 // to a V128 type and perform the insertion on that.
6962 SDLoc DL(Op);
6963 SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
6964 EVT WideTy = WideVec.getValueType();
6965
6966 SDValue Node = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideTy, WideVec,
6967 Op.getOperand(1), Op.getOperand(2));
6968 // Re-narrow the resultant vector.
6969 return NarrowVector(Node, DAG);
6970}
6971
6972SDValue
6973AArch64TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
6974 SelectionDAG &DAG) const {
6975 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!")((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 6975, __PRETTY_FUNCTION__))
;
6976
6977 // Check for non-constant or out of range lane.
6978 EVT VT = Op.getOperand(0).getValueType();
6979 ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Op.getOperand(1));
6980 if (!CI || CI->getZExtValue() >= VT.getVectorNumElements())
6981 return SDValue();
6982
6983
6984 // Insertion/extraction are legal for V128 types.
6985 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
6986 VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64 ||
6987 VT == MVT::v8f16)
6988 return Op;
6989
6990 if (VT != MVT::v8i8 && VT != MVT::v4i16 && VT != MVT::v2i32 &&
6991 VT != MVT::v1i64 && VT != MVT::v2f32 && VT != MVT::v4f16)
6992 return SDValue();
6993
6994 // For V64 types, we perform extraction by expanding the value
6995 // to a V128 type and perform the extraction on that.
6996 SDLoc DL(Op);
6997 SDValue WideVec = WidenVector(Op.getOperand(0), DAG);
6998 EVT WideTy = WideVec.getValueType();
6999
7000 EVT ExtrTy = WideTy.getVectorElementType();
7001 if (ExtrTy == MVT::i16 || ExtrTy == MVT::i8)
7002 ExtrTy = MVT::i32;
7003
7004 // For extractions, we just return the result directly.
7005 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtrTy, WideVec,
7006 Op.getOperand(1));
7007}
7008
7009SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
7010 SelectionDAG &DAG) const {
7011 EVT VT = Op.getOperand(0).getValueType();
7012 SDLoc dl(Op);
7013 // Just in case...
7014 if (!VT.isVector())
7015 return SDValue();
7016
7017 ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
7018 if (!Cst)
7019 return SDValue();
7020 unsigned Val = Cst->getZExtValue();
7021
7022 unsigned Size = Op.getValueSizeInBits();
7023
7024 // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
7025 if (Val == 0)
7026 return Op;
7027
7028 // If this is extracting the upper 64-bits of a 128-bit vector, we match
7029 // that directly.
7030 if (Size == 64 && Val * VT.getScalarSizeInBits() == 64)
7031 return Op;
7032
7033 return SDValue();
7034}
7035
7036bool AArch64TargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
7037 if (VT.getVectorNumElements() == 4 &&
7038 (VT.is128BitVector() || VT.is64BitVector())) {
7039 unsigned PFIndexes[4];
7040 for (unsigned i = 0; i != 4; ++i) {
7041 if (M[i] < 0)
7042 PFIndexes[i] = 8;
7043 else
7044 PFIndexes[i] = M[i];
7045 }
7046
7047 // Compute the index in the perfect shuffle table.
7048 unsigned PFTableIndex = PFIndexes[0] * 9 * 9 * 9 + PFIndexes[1] * 9 * 9 +
7049 PFIndexes[2] * 9 + PFIndexes[3];
7050 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
7051 unsigned Cost = (PFEntry >> 30);
7052
7053 if (Cost <= 4)
7054 return true;
7055 }
7056
7057 bool DummyBool;
7058 int DummyInt;
7059 unsigned DummyUnsigned;
7060
7061 return (ShuffleVectorSDNode::isSplatMask(&M[0], VT) || isREVMask(M, VT, 64) ||
7062 isREVMask(M, VT, 32) || isREVMask(M, VT, 16) ||
7063 isEXTMask(M, VT, DummyBool, DummyUnsigned) ||
7064 // isTBLMask(M, VT) || // FIXME: Port TBL support from ARM.
7065 isTRNMask(M, VT, DummyUnsigned) || isUZPMask(M, VT, DummyUnsigned) ||
7066 isZIPMask(M, VT, DummyUnsigned) ||
7067 isTRN_v_undef_Mask(M, VT, DummyUnsigned) ||
7068 isUZP_v_undef_Mask(M, VT, DummyUnsigned) ||
7069 isZIP_v_undef_Mask(M, VT, DummyUnsigned) ||
7070 isINSMask(M, VT.getVectorNumElements(), DummyBool, DummyInt) ||
7071 isConcatMask(M, VT, VT.getSizeInBits() == 128));
7072}
7073
7074/// getVShiftImm - Check if this is a valid build_vector for the immediate
7075/// operand of a vector shift operation, where all the elements of the
7076/// build_vector must have the same constant integer value.
7077static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
7078 // Ignore bit_converts.
7079 while (Op.getOpcode() == ISD::BITCAST)
7080 Op = Op.getOperand(0);
7081 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
7082 APInt SplatBits, SplatUndef;
7083 unsigned SplatBitSize;
7084 bool HasAnyUndefs;
7085 if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize,
7086 HasAnyUndefs, ElementBits) ||
7087 SplatBitSize > ElementBits)
7088 return false;
7089 Cnt = SplatBits.getSExtValue();
7090 return true;
7091}
7092
7093/// isVShiftLImm - Check if this is a valid build_vector for the immediate
7094/// operand of a vector shift left operation. That value must be in the range:
7095/// 0 <= Value < ElementBits for a left shift; or
7096/// 0 <= Value <= ElementBits for a long left shift.
7097static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
7098 assert(VT.isVector() && "vector shift count is not a vector type")((VT.isVector() && "vector shift count is not a vector type"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7098, __PRETTY_FUNCTION__))
;
7099 int64_t ElementBits = VT.getScalarSizeInBits();
7100 if (!getVShiftImm(Op, ElementBits, Cnt))
7101 return false;
7102 return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
7103}
7104
7105/// isVShiftRImm - Check if this is a valid build_vector for the immediate
7106/// operand of a vector shift right operation. The value must be in the range:
7107/// 1 <= Value <= ElementBits for a right shift; or
7108static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
7109 assert(VT.isVector() && "vector shift count is not a vector type")((VT.isVector() && "vector shift count is not a vector type"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"vector shift count is not a vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7109, __PRETTY_FUNCTION__))
;
7110 int64_t ElementBits = VT.getScalarSizeInBits();
7111 if (!getVShiftImm(Op, ElementBits, Cnt))
7112 return false;
7113 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
7114}
7115
7116SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
7117 SelectionDAG &DAG) const {
7118 EVT VT = Op.getValueType();
7119 SDLoc DL(Op);
7120 int64_t Cnt;
7121
7122 if (!Op.getOperand(1).getValueType().isVector())
7123 return Op;
7124 unsigned EltSize = VT.getScalarSizeInBits();
7125
7126 switch (Op.getOpcode()) {
7127 default:
7128 llvm_unreachable("unexpected shift opcode")::llvm::llvm_unreachable_internal("unexpected shift opcode", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7128)
;
7129
7130 case ISD::SHL:
7131 if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
7132 return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
7133 DAG.getConstant(Cnt, DL, MVT::i32));
7134 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
7135 DAG.getConstant(Intrinsic::aarch64_neon_ushl, DL,
7136 MVT::i32),
7137 Op.getOperand(0), Op.getOperand(1));
7138 case ISD::SRA:
7139 case ISD::SRL:
7140 // Right shift immediate
7141 if (isVShiftRImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize) {
7142 unsigned Opc =
7143 (Op.getOpcode() == ISD::SRA) ? AArch64ISD::VASHR : AArch64ISD::VLSHR;
7144 return DAG.getNode(Opc, DL, VT, Op.getOperand(0),
7145 DAG.getConstant(Cnt, DL, MVT::i32));
7146 }
7147
7148 // Right shift register. Note, there is not a shift right register
7149 // instruction, but the shift left register instruction takes a signed
7150 // value, where negative numbers specify a right shift.
7151 unsigned Opc = (Op.getOpcode() == ISD::SRA) ? Intrinsic::aarch64_neon_sshl
7152 : Intrinsic::aarch64_neon_ushl;
7153 // negate the shift amount
7154 SDValue NegShift = DAG.getNode(AArch64ISD::NEG, DL, VT, Op.getOperand(1));
7155 SDValue NegShiftLeft =
7156 DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
7157 DAG.getConstant(Opc, DL, MVT::i32), Op.getOperand(0),
7158 NegShift);
7159 return NegShiftLeft;
7160 }
7161
7162 return SDValue();
7163}
7164
7165static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
7166 AArch64CC::CondCode CC, bool NoNans, EVT VT,
7167 const SDLoc &dl, SelectionDAG &DAG) {
7168 EVT SrcVT = LHS.getValueType();
7169 assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&((VT.getSizeInBits() == SrcVT.getSizeInBits() && "function only supposed to emit natural comparisons"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == SrcVT.getSizeInBits() && \"function only supposed to emit natural comparisons\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7170, __PRETTY_FUNCTION__))
7170 "function only supposed to emit natural comparisons")((VT.getSizeInBits() == SrcVT.getSizeInBits() && "function only supposed to emit natural comparisons"
) ? static_cast<void> (0) : __assert_fail ("VT.getSizeInBits() == SrcVT.getSizeInBits() && \"function only supposed to emit natural comparisons\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7170, __PRETTY_FUNCTION__))
;
7171
7172 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
7173 APInt CnstBits(VT.getSizeInBits(), 0);
7174 APInt UndefBits(VT.getSizeInBits(), 0);
7175 bool IsCnst = BVN && resolveBuildVector(BVN, CnstBits, UndefBits);
7176 bool IsZero = IsCnst && (CnstBits == 0);
7177
7178 if (SrcVT.getVectorElementType().isFloatingPoint()) {
7179 switch (CC) {
7180 default:
7181 return SDValue();
7182 case AArch64CC::NE: {
7183 SDValue Fcmeq;
7184 if (IsZero)
7185 Fcmeq = DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
7186 else
7187 Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
7188 return DAG.getNode(AArch64ISD::NOT, dl, VT, Fcmeq);
7189 }
7190 case AArch64CC::EQ:
7191 if (IsZero)
7192 return DAG.getNode(AArch64ISD::FCMEQz, dl, VT, LHS);
7193 return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
7194 case AArch64CC::GE:
7195 if (IsZero)
7196 return DAG.getNode(AArch64ISD::FCMGEz, dl, VT, LHS);
7197 return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
7198 case AArch64CC::GT:
7199 if (IsZero)
7200 return DAG.getNode(AArch64ISD::FCMGTz, dl, VT, LHS);
7201 return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
7202 case AArch64CC::LS:
7203 if (IsZero)
7204 return DAG.getNode(AArch64ISD::FCMLEz, dl, VT, LHS);
7205 return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
7206 case AArch64CC::LT:
7207 if (!NoNans)
7208 return SDValue();
7209 // If we ignore NaNs then we can use to the MI implementation.
7210 LLVM_FALLTHROUGH[[clang::fallthrough]];
7211 case AArch64CC::MI:
7212 if (IsZero)
7213 return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
7214 return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
7215 }
7216 }
7217
7218 switch (CC) {
7219 default:
7220 return SDValue();
7221 case AArch64CC::NE: {
7222 SDValue Cmeq;
7223 if (IsZero)
7224 Cmeq = DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
7225 else
7226 Cmeq = DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
7227 return DAG.getNode(AArch64ISD::NOT, dl, VT, Cmeq);
7228 }
7229 case AArch64CC::EQ:
7230 if (IsZero)
7231 return DAG.getNode(AArch64ISD::CMEQz, dl, VT, LHS);
7232 return DAG.getNode(AArch64ISD::CMEQ, dl, VT, LHS, RHS);
7233 case AArch64CC::GE:
7234 if (IsZero)
7235 return DAG.getNode(AArch64ISD::CMGEz, dl, VT, LHS);
7236 return DAG.getNode(AArch64ISD::CMGE, dl, VT, LHS, RHS);
7237 case AArch64CC::GT:
7238 if (IsZero)
7239 return DAG.getNode(AArch64ISD::CMGTz, dl, VT, LHS);
7240 return DAG.getNode(AArch64ISD::CMGT, dl, VT, LHS, RHS);
7241 case AArch64CC::LE:
7242 if (IsZero)
7243 return DAG.getNode(AArch64ISD::CMLEz, dl, VT, LHS);
7244 return DAG.getNode(AArch64ISD::CMGE, dl, VT, RHS, LHS);
7245 case AArch64CC::LS:
7246 return DAG.getNode(AArch64ISD::CMHS, dl, VT, RHS, LHS);
7247 case AArch64CC::LO:
7248 return DAG.getNode(AArch64ISD::CMHI, dl, VT, RHS, LHS);
7249 case AArch64CC::LT:
7250 if (IsZero)
7251 return DAG.getNode(AArch64ISD::CMLTz, dl, VT, LHS);
7252 return DAG.getNode(AArch64ISD::CMGT, dl, VT, RHS, LHS);
7253 case AArch64CC::HI:
7254 return DAG.getNode(AArch64ISD::CMHI, dl, VT, LHS, RHS);
7255 case AArch64CC::HS:
7256 return DAG.getNode(AArch64ISD::CMHS, dl, VT, LHS, RHS);
7257 }
7258}
7259
7260SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
7261 SelectionDAG &DAG) const {
7262 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
7263 SDValue LHS = Op.getOperand(0);
7264 SDValue RHS = Op.getOperand(1);
7265 EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
7266 SDLoc dl(Op);
7267
7268 if (LHS.getValueType().getVectorElementType().isInteger()) {
7269 assert(LHS.getValueType() == RHS.getValueType())((LHS.getValueType() == RHS.getValueType()) ? static_cast<
void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7269, __PRETTY_FUNCTION__))
;
7270 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
7271 SDValue Cmp =
7272 EmitVectorComparison(LHS, RHS, AArch64CC, false, CmpVT, dl, DAG);
7273 return DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
7274 }
7275
7276 if (LHS.getValueType().getVectorElementType() == MVT::f16)
7277 return SDValue();
7278
7279 assert(LHS.getValueType().getVectorElementType() == MVT::f32 ||((LHS.getValueType().getVectorElementType() == MVT::f32 || LHS
.getValueType().getVectorElementType() == MVT::f64) ? static_cast
<void> (0) : __assert_fail ("LHS.getValueType().getVectorElementType() == MVT::f32 || LHS.getValueType().getVectorElementType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7280, __PRETTY_FUNCTION__))
7280 LHS.getValueType().getVectorElementType() == MVT::f64)((LHS.getValueType().getVectorElementType() == MVT::f32 || LHS
.getValueType().getVectorElementType() == MVT::f64) ? static_cast
<void> (0) : __assert_fail ("LHS.getValueType().getVectorElementType() == MVT::f32 || LHS.getValueType().getVectorElementType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7280, __PRETTY_FUNCTION__))
;
7281
7282 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
7283 // clean. Some of them require two branches to implement.
7284 AArch64CC::CondCode CC1, CC2;
7285 bool ShouldInvert;
7286 changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
7287
7288 bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath;
7289 SDValue Cmp =
7290 EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
7291 if (!Cmp.getNode())
7292 return SDValue();
7293
7294 if (CC2 != AArch64CC::AL) {
7295 SDValue Cmp2 =
7296 EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
7297 if (!Cmp2.getNode())
7298 return SDValue();
7299
7300 Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
7301 }
7302
7303 Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
7304
7305 if (ShouldInvert)
7306 return Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
7307
7308 return Cmp;
7309}
7310
7311static SDValue getReductionSDNode(unsigned Op, SDLoc DL, SDValue ScalarOp,
7312 SelectionDAG &DAG) {
7313 SDValue VecOp = ScalarOp.getOperand(0);
7314 auto Rdx = DAG.getNode(Op, DL, VecOp.getSimpleValueType(), VecOp);
7315 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarOp.getValueType(), Rdx,
7316 DAG.getConstant(0, DL, MVT::i64));
7317}
7318
7319SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
7320 SelectionDAG &DAG) const {
7321 SDLoc dl(Op);
7322 switch (Op.getOpcode()) {
7323 case ISD::VECREDUCE_ADD:
7324 return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
7325 case ISD::VECREDUCE_SMAX:
7326 return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
7327 case ISD::VECREDUCE_SMIN:
7328 return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
7329 case ISD::VECREDUCE_UMAX:
7330 return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
7331 case ISD::VECREDUCE_UMIN:
7332 return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
7333 case ISD::VECREDUCE_FMAX: {
7334 assert(Op->getFlags().hasNoNaNs() && "fmax vector reduction needs NoNaN flag")((Op->getFlags().hasNoNaNs() && "fmax vector reduction needs NoNaN flag"
) ? static_cast<void> (0) : __assert_fail ("Op->getFlags().hasNoNaNs() && \"fmax vector reduction needs NoNaN flag\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7334, __PRETTY_FUNCTION__))
;
7335 return DAG.getNode(
7336 ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
7337 DAG.getConstant(Intrinsic::aarch64_neon_fmaxnmv, dl, MVT::i32),
7338 Op.getOperand(0));
7339 }
7340 case ISD::VECREDUCE_FMIN: {
7341 assert(Op->getFlags().hasNoNaNs() && "fmin vector reduction needs NoNaN flag")((Op->getFlags().hasNoNaNs() && "fmin vector reduction needs NoNaN flag"
) ? static_cast<void> (0) : __assert_fail ("Op->getFlags().hasNoNaNs() && \"fmin vector reduction needs NoNaN flag\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7341, __PRETTY_FUNCTION__))
;
7342 return DAG.getNode(
7343 ISD::INTRINSIC_WO_CHAIN, dl, Op.getValueType(),
7344 DAG.getConstant(Intrinsic::aarch64_neon_fminnmv, dl, MVT::i32),
7345 Op.getOperand(0));
7346 }
7347 default:
7348 llvm_unreachable("Unhandled reduction")::llvm::llvm_unreachable_internal("Unhandled reduction", "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7348)
;
7349 }
7350}
7351
7352/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
7353/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment
7354/// specified in the intrinsic calls.
7355bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
7356 const CallInst &I,
7357 unsigned Intrinsic) const {
7358 auto &DL = I.getModule()->getDataLayout();
7359 switch (Intrinsic) {
7360 case Intrinsic::aarch64_neon_ld2:
7361 case Intrinsic::aarch64_neon_ld3:
7362 case Intrinsic::aarch64_neon_ld4:
7363 case Intrinsic::aarch64_neon_ld1x2:
7364 case Intrinsic::aarch64_neon_ld1x3:
7365 case Intrinsic::aarch64_neon_ld1x4:
7366 case Intrinsic::aarch64_neon_ld2lane:
7367 case Intrinsic::aarch64_neon_ld3lane:
7368 case Intrinsic::aarch64_neon_ld4lane:
7369 case Intrinsic::aarch64_neon_ld2r:
7370 case Intrinsic::aarch64_neon_ld3r:
7371 case Intrinsic::aarch64_neon_ld4r: {
7372 Info.opc = ISD::INTRINSIC_W_CHAIN;
7373 // Conservatively set memVT to the entire set of vectors loaded.
7374 uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64;
7375 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
7376 Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
7377 Info.offset = 0;
7378 Info.align = 0;
7379 Info.vol = false; // volatile loads with NEON intrinsics not supported
7380 Info.readMem = true;
7381 Info.writeMem = false;
7382 return true;
7383 }
7384 case Intrinsic::aarch64_neon_st2:
7385 case Intrinsic::aarch64_neon_st3:
7386 case Intrinsic::aarch64_neon_st4:
7387 case Intrinsic::aarch64_neon_st1x2:
7388 case Intrinsic::aarch64_neon_st1x3:
7389 case Intrinsic::aarch64_neon_st1x4:
7390 case Intrinsic::aarch64_neon_st2lane:
7391 case Intrinsic::aarch64_neon_st3lane:
7392 case Intrinsic::aarch64_neon_st4lane: {
7393 Info.opc = ISD::INTRINSIC_VOID;
7394 // Conservatively set memVT to the entire set of vectors stored.
7395 unsigned NumElts = 0;
7396 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) {
7397 Type *ArgTy = I.getArgOperand(ArgI)->getType();
7398 if (!ArgTy->isVectorTy())
7399 break;
7400 NumElts += DL.getTypeSizeInBits(ArgTy) / 64;
7401 }
7402 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts);
7403 Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1);
7404 Info.offset = 0;
7405 Info.align = 0;
7406 Info.vol = false; // volatile stores with NEON intrinsics not supported
7407 Info.readMem = false;
7408 Info.writeMem = true;
7409 return true;
7410 }
7411 case Intrinsic::aarch64_ldaxr:
7412 case Intrinsic::aarch64_ldxr: {
7413 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
7414 Info.opc = ISD::INTRINSIC_W_CHAIN;
7415 Info.memVT = MVT::getVT(PtrTy->getElementType());
7416 Info.ptrVal = I.getArgOperand(0);
7417 Info.offset = 0;
7418 Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
7419 Info.vol = true;
7420 Info.readMem = true;
7421 Info.writeMem = false;
7422 return true;
7423 }
7424 case Intrinsic::aarch64_stlxr:
7425 case Intrinsic::aarch64_stxr: {
7426 PointerType *PtrTy = cast<PointerType>(I.getArgOperand(1)->getType());
7427 Info.opc = ISD::INTRINSIC_W_CHAIN;
7428 Info.memVT = MVT::getVT(PtrTy->getElementType());
7429 Info.ptrVal = I.getArgOperand(1);
7430 Info.offset = 0;
7431 Info.align = DL.getABITypeAlignment(PtrTy->getElementType());
7432 Info.vol = true;
7433 Info.readMem = false;
7434 Info.writeMem = true;
7435 return true;
7436 }
7437 case Intrinsic::aarch64_ldaxp:
7438 case Intrinsic::aarch64_ldxp:
7439 Info.opc = ISD::INTRINSIC_W_CHAIN;
7440 Info.memVT = MVT::i128;
7441 Info.ptrVal = I.getArgOperand(0);
7442 Info.offset = 0;
7443 Info.align = 16;
7444 Info.vol = true;
7445 Info.readMem = true;
7446 Info.writeMem = false;
7447 return true;
7448 case Intrinsic::aarch64_stlxp:
7449 case Intrinsic::aarch64_stxp:
7450 Info.opc = ISD::INTRINSIC_W_CHAIN;
7451 Info.memVT = MVT::i128;
7452 Info.ptrVal = I.getArgOperand(2);
7453 Info.offset = 0;
7454 Info.align = 16;
7455 Info.vol = true;
7456 Info.readMem = false;
7457 Info.writeMem = true;
7458 return true;
7459 default:
7460 break;
7461 }
7462
7463 return false;
7464}
7465
7466// Truncations from 64-bit GPR to 32-bit GPR is free.
7467bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
7468 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
7469 return false;
7470 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
7471 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
7472 return NumBits1 > NumBits2;
7473}
7474bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
7475 if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
7476 return false;
7477 unsigned NumBits1 = VT1.getSizeInBits();
7478 unsigned NumBits2 = VT2.getSizeInBits();
7479 return NumBits1 > NumBits2;
7480}
7481
7482/// Check if it is profitable to hoist instruction in then/else to if.
7483/// Not profitable if I and it's user can form a FMA instruction
7484/// because we prefer FMSUB/FMADD.
7485bool AArch64TargetLowering::isProfitableToHoist(Instruction *I) const {
7486 if (I->getOpcode() != Instruction::FMul)
7487 return true;
7488
7489 if (!I->hasOneUse())
7490 return true;
7491
7492 Instruction *User = I->user_back();
7493
7494 if (User &&
7495 !(User->getOpcode() == Instruction::FSub ||
7496 User->getOpcode() == Instruction::FAdd))
7497 return true;
7498
7499 const TargetOptions &Options = getTargetMachine().Options;
7500 const DataLayout &DL = I->getModule()->getDataLayout();
7501 EVT VT = getValueType(DL, User->getOperand(0)->getType());
7502
7503 return !(isFMAFasterThanFMulAndFAdd(VT) &&
7504 isOperationLegalOrCustom(ISD::FMA, VT) &&
7505 (Options.AllowFPOpFusion == FPOpFusion::Fast ||
7506 Options.UnsafeFPMath));
7507}
7508
7509// All 32-bit GPR operations implicitly zero the high-half of the corresponding
7510// 64-bit GPR.
7511bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
7512 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
7513 return false;
7514 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
7515 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
7516 return NumBits1 == 32 && NumBits2 == 64;
7517}
7518bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
7519 if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger())
7520 return false;
7521 unsigned NumBits1 = VT1.getSizeInBits();
7522 unsigned NumBits2 = VT2.getSizeInBits();
7523 return NumBits1 == 32 && NumBits2 == 64;
7524}
7525
7526bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
7527 EVT VT1 = Val.getValueType();
7528 if (isZExtFree(VT1, VT2)) {
7529 return true;
7530 }
7531
7532 if (Val.getOpcode() != ISD::LOAD)
7533 return false;
7534
7535 // 8-, 16-, and 32-bit integer loads all implicitly zero-extend.
7536 return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() &&
7537 VT2.isSimple() && !VT2.isVector() && VT2.isInteger() &&
7538 VT1.getSizeInBits() <= 32);
7539}
7540
7541bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
7542 if (isa<FPExtInst>(Ext))
7543 return false;
7544
7545 // Vector types are not free.
7546 if (Ext->getType()->isVectorTy())
7547 return false;
7548
7549 for (const Use &U : Ext->uses()) {
7550 // The extension is free if we can fold it with a left shift in an
7551 // addressing mode or an arithmetic operation: add, sub, and cmp.
7552
7553 // Is there a shift?
7554 const Instruction *Instr = cast<Instruction>(U.getUser());
7555
7556 // Is this a constant shift?
7557 switch (Instr->getOpcode()) {
7558 case Instruction::Shl:
7559 if (!isa<ConstantInt>(Instr->getOperand(1)))
7560 return false;
7561 break;
7562 case Instruction::GetElementPtr: {
7563 gep_type_iterator GTI = gep_type_begin(Instr);
7564 auto &DL = Ext->getModule()->getDataLayout();
7565 std::advance(GTI, U.getOperandNo()-1);
7566 Type *IdxTy = GTI.getIndexedType();
7567 // This extension will end up with a shift because of the scaling factor.
7568 // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
7569 // Get the shift amount based on the scaling factor:
7570 // log2(sizeof(IdxTy)) - log2(8).
7571 uint64_t ShiftAmt =
7572 countTrailingZeros(DL.getTypeStoreSizeInBits(IdxTy)) - 3;
7573 // Is the constant foldable in the shift of the addressing mode?
7574 // I.e., shift amount is between 1 and 4 inclusive.
7575 if (ShiftAmt == 0 || ShiftAmt > 4)
7576 return false;
7577 break;
7578 }
7579 case Instruction::Trunc:
7580 // Check if this is a noop.
7581 // trunc(sext ty1 to ty2) to ty1.
7582 if (Instr->getType() == Ext->getOperand(0)->getType())
7583 continue;
7584 LLVM_FALLTHROUGH[[clang::fallthrough]];
7585 default:
7586 return false;
7587 }
7588
7589 // At this point we can use the bfm family, so this extension is free
7590 // for that use.
7591 }
7592 return true;
7593}
7594
7595bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
7596 unsigned &RequiredAligment) const {
7597 if (!LoadedType.isSimple() ||
7598 (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
7599 return false;
7600 // Cyclone supports unaligned accesses.
7601 RequiredAligment = 0;
7602 unsigned NumBits = LoadedType.getSizeInBits();
7603 return NumBits == 32 || NumBits == 64;
7604}
7605
7606/// A helper function for determining the number of interleaved accesses we
7607/// will generate when lowering accesses of the given type.
7608unsigned
7609AArch64TargetLowering::getNumInterleavedAccesses(VectorType *VecTy,
7610 const DataLayout &DL) const {
7611 return (DL.getTypeSizeInBits(VecTy) + 127) / 128;
7612}
7613
7614MachineMemOperand::Flags
7615AArch64TargetLowering::getMMOFlags(const Instruction &I) const {
7616 if (Subtarget->getProcFamily() == AArch64Subtarget::Falkor &&
7617 I.getMetadata(FALKOR_STRIDED_ACCESS_MD"falkor.strided.access") != nullptr)
7618 return MOStridedAccess;
7619 return MachineMemOperand::MONone;
7620}
7621
7622bool AArch64TargetLowering::isLegalInterleavedAccessType(
7623 VectorType *VecTy, const DataLayout &DL) const {
7624
7625 unsigned VecSize = DL.getTypeSizeInBits(VecTy);
7626 unsigned ElSize = DL.getTypeSizeInBits(VecTy->getElementType());
7627
7628 // Ensure the number of vector elements is greater than 1.
7629 if (VecTy->getNumElements() < 2)
7630 return false;
7631
7632 // Ensure the element type is legal.
7633 if (ElSize != 8 && ElSize != 16 && ElSize != 32 && ElSize != 64)
7634 return false;
7635
7636 // Ensure the total vector size is 64 or a multiple of 128. Types larger than
7637 // 128 will be split into multiple interleaved accesses.
7638 return VecSize == 64 || VecSize % 128 == 0;
7639}
7640
7641/// \brief Lower an interleaved load into a ldN intrinsic.
7642///
7643/// E.g. Lower an interleaved load (Factor = 2):
7644/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
7645/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
7646/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
7647///
7648/// Into:
7649/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.aarch64.neon.ld2(%ptr)
7650/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
7651/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
7652bool AArch64TargetLowering::lowerInterleavedLoad(
7653 LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
7654 ArrayRef<unsigned> Indices, unsigned Factor) const {
7655 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&((Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor
() && "Invalid interleave factor") ? static_cast<void
> (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7656, __PRETTY_FUNCTION__))
7656 "Invalid interleave factor")((Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor
() && "Invalid interleave factor") ? static_cast<void
> (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7656, __PRETTY_FUNCTION__))
;
7657 assert(!Shuffles.empty() && "Empty shufflevector input")((!Shuffles.empty() && "Empty shufflevector input") ?
static_cast<void> (0) : __assert_fail ("!Shuffles.empty() && \"Empty shufflevector input\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7657, __PRETTY_FUNCTION__))
;
7658 assert(Shuffles.size() == Indices.size() &&((Shuffles.size() == Indices.size() && "Unmatched number of shufflevectors and indices"
) ? static_cast<void> (0) : __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7659, __PRETTY_FUNCTION__))
7659 "Unmatched number of shufflevectors and indices")((Shuffles.size() == Indices.size() && "Unmatched number of shufflevectors and indices"
) ? static_cast<void> (0) : __assert_fail ("Shuffles.size() == Indices.size() && \"Unmatched number of shufflevectors and indices\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7659, __PRETTY_FUNCTION__))
;
7660
7661 const DataLayout &DL = LI->getModule()->getDataLayout();
7662
7663 VectorType *VecTy = Shuffles[0]->getType();
7664
7665 // Skip if we do not have NEON and skip illegal vector types. We can
7666 // "legalize" wide vector types into multiple interleaved accesses as long as
7667 // the vector types are divisible by 128.
7668 if (!Subtarget->hasNEON() || !isLegalInterleavedAccessType(VecTy, DL))
7669 return false;
7670
7671 unsigned NumLoads = getNumInterleavedAccesses(VecTy, DL);
7672
7673 // A pointer vector can not be the return type of the ldN intrinsics. Need to
7674 // load integer vectors first and then convert to pointer vectors.
7675 Type *EltTy = VecTy->getVectorElementType();
7676 if (EltTy->isPointerTy())
7677 VecTy =
7678 VectorType::get(DL.getIntPtrType(EltTy), VecTy->getVectorNumElements());
7679
7680 IRBuilder<> Builder(LI);
7681
7682 // The base address of the load.
7683 Value *BaseAddr = LI->getPointerOperand();
7684
7685 if (NumLoads > 1) {
7686 // If we're going to generate more than one load, reset the sub-vector type
7687 // to something legal.
7688 VecTy = VectorType::get(VecTy->getVectorElementType(),
7689 VecTy->getVectorNumElements() / NumLoads);
7690
7691 // We will compute the pointer operand of each load from the original base
7692 // address using GEPs. Cast the base address to a pointer to the scalar
7693 // element type.
7694 BaseAddr = Builder.CreateBitCast(
7695 BaseAddr, VecTy->getVectorElementType()->getPointerTo(
7696 LI->getPointerAddressSpace()));
7697 }
7698
7699 Type *PtrTy = VecTy->getPointerTo(LI->getPointerAddressSpace());
7700 Type *Tys[2] = {VecTy, PtrTy};
7701 static const Intrinsic::ID LoadInts[3] = {Intrinsic::aarch64_neon_ld2,
7702 Intrinsic::aarch64_neon_ld3,
7703 Intrinsic::aarch64_neon_ld4};
7704 Function *LdNFunc =
7705 Intrinsic::getDeclaration(LI->getModule(), LoadInts[Factor - 2], Tys);
7706
7707 // Holds sub-vectors extracted from the load intrinsic return values. The
7708 // sub-vectors are associated with the shufflevector instructions they will
7709 // replace.
7710 DenseMap<ShuffleVectorInst *, SmallVector<Value *, 4>> SubVecs;
7711
7712 for (unsigned LoadCount = 0; LoadCount < NumLoads; ++LoadCount) {
7713
7714 // If we're generating more than one load, compute the base address of
7715 // subsequent loads as an offset from the previous.
7716 if (LoadCount > 0)
7717 BaseAddr = Builder.CreateConstGEP1_32(
7718 BaseAddr, VecTy->getVectorNumElements() * Factor);
7719
7720 CallInst *LdN = Builder.CreateCall(
7721 LdNFunc, Builder.CreateBitCast(BaseAddr, PtrTy), "ldN");
7722
7723 // Extract and store the sub-vectors returned by the load intrinsic.
7724 for (unsigned i = 0; i < Shuffles.size(); i++) {
7725 ShuffleVectorInst *SVI = Shuffles[i];
7726 unsigned Index = Indices[i];
7727
7728 Value *SubVec = Builder.CreateExtractValue(LdN, Index);
7729
7730 // Convert the integer vector to pointer vector if the element is pointer.
7731 if (EltTy->isPointerTy())
7732 SubVec = Builder.CreateIntToPtr(
7733 SubVec, VectorType::get(SVI->getType()->getVectorElementType(),
7734 VecTy->getVectorNumElements()));
7735 SubVecs[SVI].push_back(SubVec);
7736 }
7737 }
7738
7739 // Replace uses of the shufflevector instructions with the sub-vectors
7740 // returned by the load intrinsic. If a shufflevector instruction is
7741 // associated with more than one sub-vector, those sub-vectors will be
7742 // concatenated into a single wide vector.
7743 for (ShuffleVectorInst *SVI : Shuffles) {
7744 auto &SubVec = SubVecs[SVI];
7745 auto *WideVec =
7746 SubVec.size() > 1 ? concatenateVectors(Builder, SubVec) : SubVec[0];
7747 SVI->replaceAllUsesWith(WideVec);
7748 }
7749
7750 return true;
7751}
7752
7753/// \brief Lower an interleaved store into a stN intrinsic.
7754///
7755/// E.g. Lower an interleaved store (Factor = 3):
7756/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
7757/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
7758/// store <12 x i32> %i.vec, <12 x i32>* %ptr
7759///
7760/// Into:
7761/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
7762/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
7763/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
7764/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
7765///
7766/// Note that the new shufflevectors will be removed and we'll only generate one
7767/// st3 instruction in CodeGen.
7768///
7769/// Example for a more general valid mask (Factor 3). Lower:
7770/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
7771/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
7772/// store <12 x i32> %i.vec, <12 x i32>* %ptr
7773///
7774/// Into:
7775/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
7776/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
7777/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
7778/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
7779bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
7780 ShuffleVectorInst *SVI,
7781 unsigned Factor) const {
7782 assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&((Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor
() && "Invalid interleave factor") ? static_cast<void
> (0) : __assert_fail ("Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-6.0~svn315928/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 7783, __PRETTY_FUNCTION__))
7783