Bug Summary

File:lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 8399, column 48
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn329677/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn329677/build-llvm/lib/Target/AArch64 -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-04-11-031539-24776-1 -x c++ /build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp
1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the AArch64TargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64ISelLowering.h"
15#include "AArch64CallingConvention.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/CallingConvLower.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstr.h"
38#include "llvm/CodeGen/MachineInstrBuilder.h"
39#include "llvm/CodeGen/MachineMemOperand.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/RuntimeLibcalls.h"
42#include "llvm/CodeGen/SelectionDAG.h"
43#include "llvm/CodeGen/SelectionDAGNodes.h"
44#include "llvm/CodeGen/TargetCallingConv.h"
45#include "llvm/CodeGen/TargetInstrInfo.h"
46#include "llvm/CodeGen/ValueTypes.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/DebugLoc.h"
51#include "llvm/IR/DerivedTypes.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/GetElementPtrTypeIterator.h"
54#include "llvm/IR/GlobalValue.h"
55#include "llvm/IR/IRBuilder.h"
56#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Instructions.h"
58#include "llvm/IR/Intrinsics.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/OperandTraits.h"
61#include "llvm/IR/Type.h"
62#include "llvm/IR/Use.h"
63#include "llvm/IR/Value.h"
64#include "llvm/MC/MCRegisterInfo.h"
65#include "llvm/Support/Casting.h"
66#include "llvm/Support/CodeGen.h"
67#include "llvm/Support/CommandLine.h"
68#include "llvm/Support/Compiler.h"
69#include "llvm/Support/Debug.h"
70#include "llvm/Support/ErrorHandling.h"
71#include "llvm/Support/KnownBits.h"
72#include "llvm/Support/MachineValueType.h"
73#include "llvm/Support/MathExtras.h"
74#include "llvm/Support/raw_ostream.h"
75#include "llvm/Target/TargetMachine.h"
76#include "llvm/Target/TargetOptions.h"
77#include <algorithm>
78#include <bitset>
79#include <cassert>
80#include <cctype>
81#include <cstdint>
82#include <cstdlib>
83#include <iterator>
84#include <limits>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90
91#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
92
93STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
94STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts", {0}, {false}}
;
95STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized", {0}, {false}}
;
96
97static cl::opt<bool>
98EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
99 cl::desc("Allow AArch64 SLI/SRI formation"),
100 cl::init(false));
101
102// FIXME: The necessary dtprel relocations don't seem to be supported
103// well in the GNU bfd and gold linkers at the moment. Therefore, by
104// default, for now, fall back to GeneralDynamic code generation.
105cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
106 "aarch64-elf-ldtls-generation", cl::Hidden,
107 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
108 cl::init(false));
109
110static cl::opt<bool>
111EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
112 cl::desc("Enable AArch64 logical imm instruction "
113 "optimization"),
114 cl::init(true));
115
116/// Value type used for condition codes.
117static const MVT MVT_CC = MVT::i32;
118
119AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
120 const AArch64Subtarget &STI)
121 : TargetLowering(TM), Subtarget(&STI) {
122 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
123 // we have to make something up. Arbitrarily, choose ZeroOrOne.
124 setBooleanContents(ZeroOrOneBooleanContent);
125 // When comparing vectors the result sets the different elements in the
126 // vector to all-one or all-zero.
127 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
128
129 // Set up the register classes.
130 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
131 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
132
133 if (Subtarget->hasFPARMv8()) {
134 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
135 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
136 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
137 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
138 }
139
140 if (Subtarget->hasNEON()) {
141 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
142 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
143 // Someone set us up the NEON.
144 addDRTypeForNEON(MVT::v2f32);
145 addDRTypeForNEON(MVT::v8i8);
146 addDRTypeForNEON(MVT::v4i16);
147 addDRTypeForNEON(MVT::v2i32);
148 addDRTypeForNEON(MVT::v1i64);
149 addDRTypeForNEON(MVT::v1f64);
150 addDRTypeForNEON(MVT::v4f16);
151
152 addQRTypeForNEON(MVT::v4f32);
153 addQRTypeForNEON(MVT::v2f64);
154 addQRTypeForNEON(MVT::v16i8);
155 addQRTypeForNEON(MVT::v8i16);
156 addQRTypeForNEON(MVT::v4i32);
157 addQRTypeForNEON(MVT::v2i64);
158 addQRTypeForNEON(MVT::v8f16);
159 }
160
161 // Compute derived properties from the register classes
162 computeRegisterProperties(Subtarget->getRegisterInfo());
163
164 // Provide all sorts of operation actions
165 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
166 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
167 setOperationAction(ISD::SETCC, MVT::i32, Custom);
168 setOperationAction(ISD::SETCC, MVT::i64, Custom);
169 setOperationAction(ISD::SETCC, MVT::f16, Custom);
170 setOperationAction(ISD::SETCC, MVT::f32, Custom);
171 setOperationAction(ISD::SETCC, MVT::f64, Custom);
172 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
173 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
174 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
175 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
176 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
177 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
178 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
179 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
180 setOperationAction(ISD::SELECT, MVT::i32, Custom);
181 setOperationAction(ISD::SELECT, MVT::i64, Custom);
182 setOperationAction(ISD::SELECT, MVT::f16, Custom);
183 setOperationAction(ISD::SELECT, MVT::f32, Custom);
184 setOperationAction(ISD::SELECT, MVT::f64, Custom);
185 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
186 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
187 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
188 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
189 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
190 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
191 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
192
193 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
194 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
195 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
196
197 setOperationAction(ISD::FREM, MVT::f32, Expand);
198 setOperationAction(ISD::FREM, MVT::f64, Expand);
199 setOperationAction(ISD::FREM, MVT::f80, Expand);
200
201 // Custom lowering hooks are needed for XOR
202 // to fold it into CSINC/CSINV.
203 setOperationAction(ISD::XOR, MVT::i32, Custom);
204 setOperationAction(ISD::XOR, MVT::i64, Custom);
205
206 // Virtually no operation on f128 is legal, but LLVM can't expand them when
207 // there's a valid register class, so we need custom operations in most cases.
208 setOperationAction(ISD::FABS, MVT::f128, Expand);
209 setOperationAction(ISD::FADD, MVT::f128, Custom);
210 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
211 setOperationAction(ISD::FCOS, MVT::f128, Expand);
212 setOperationAction(ISD::FDIV, MVT::f128, Custom);
213 setOperationAction(ISD::FMA, MVT::f128, Expand);
214 setOperationAction(ISD::FMUL, MVT::f128, Custom);
215 setOperationAction(ISD::FNEG, MVT::f128, Expand);
216 setOperationAction(ISD::FPOW, MVT::f128, Expand);
217 setOperationAction(ISD::FREM, MVT::f128, Expand);
218 setOperationAction(ISD::FRINT, MVT::f128, Expand);
219 setOperationAction(ISD::FSIN, MVT::f128, Expand);
220 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
221 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
222 setOperationAction(ISD::FSUB, MVT::f128, Custom);
223 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
224 setOperationAction(ISD::SETCC, MVT::f128, Custom);
225 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
226 setOperationAction(ISD::SELECT, MVT::f128, Custom);
227 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
228 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
229
230 // Lowering for many of the conversions is actually specified by the non-f128
231 // type. The LowerXXX function will be trivial when f128 isn't involved.
232 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
233 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
234 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
235 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
236 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
237 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
238 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
239 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
240 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
241 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
242 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
243 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
244 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
245 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
246
247 // Variable arguments.
248 setOperationAction(ISD::VASTART, MVT::Other, Custom);
249 setOperationAction(ISD::VAARG, MVT::Other, Custom);
250 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
251 setOperationAction(ISD::VAEND, MVT::Other, Expand);
252
253 // Variable-sized objects.
254 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
255 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
256
257 if (Subtarget->isTargetWindows())
258 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
259 else
260 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
261
262 // Constant pool entries
263 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
264
265 // BlockAddress
266 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
267
268 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
269 setOperationAction(ISD::ADDC, MVT::i32, Custom);
270 setOperationAction(ISD::ADDE, MVT::i32, Custom);
271 setOperationAction(ISD::SUBC, MVT::i32, Custom);
272 setOperationAction(ISD::SUBE, MVT::i32, Custom);
273 setOperationAction(ISD::ADDC, MVT::i64, Custom);
274 setOperationAction(ISD::ADDE, MVT::i64, Custom);
275 setOperationAction(ISD::SUBC, MVT::i64, Custom);
276 setOperationAction(ISD::SUBE, MVT::i64, Custom);
277
278 // AArch64 lacks both left-rotate and popcount instructions.
279 setOperationAction(ISD::ROTL, MVT::i32, Expand);
280 setOperationAction(ISD::ROTL, MVT::i64, Expand);
281 for (MVT VT : MVT::vector_valuetypes()) {
282 setOperationAction(ISD::ROTL, VT, Expand);
283 setOperationAction(ISD::ROTR, VT, Expand);
284 }
285
286 // AArch64 doesn't have {U|S}MUL_LOHI.
287 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
288 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
289
290 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
291 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
292
293 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
294 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
295 for (MVT VT : MVT::vector_valuetypes()) {
296 setOperationAction(ISD::SDIVREM, VT, Expand);
297 setOperationAction(ISD::UDIVREM, VT, Expand);
298 }
299 setOperationAction(ISD::SREM, MVT::i32, Expand);
300 setOperationAction(ISD::SREM, MVT::i64, Expand);
301 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
302 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
303 setOperationAction(ISD::UREM, MVT::i32, Expand);
304 setOperationAction(ISD::UREM, MVT::i64, Expand);
305
306 // Custom lower Add/Sub/Mul with overflow.
307 setOperationAction(ISD::SADDO, MVT::i32, Custom);
308 setOperationAction(ISD::SADDO, MVT::i64, Custom);
309 setOperationAction(ISD::UADDO, MVT::i32, Custom);
310 setOperationAction(ISD::UADDO, MVT::i64, Custom);
311 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
312 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
313 setOperationAction(ISD::USUBO, MVT::i32, Custom);
314 setOperationAction(ISD::USUBO, MVT::i64, Custom);
315 setOperationAction(ISD::SMULO, MVT::i32, Custom);
316 setOperationAction(ISD::SMULO, MVT::i64, Custom);
317 setOperationAction(ISD::UMULO, MVT::i32, Custom);
318 setOperationAction(ISD::UMULO, MVT::i64, Custom);
319
320 setOperationAction(ISD::FSIN, MVT::f32, Expand);
321 setOperationAction(ISD::FSIN, MVT::f64, Expand);
322 setOperationAction(ISD::FCOS, MVT::f32, Expand);
323 setOperationAction(ISD::FCOS, MVT::f64, Expand);
324 setOperationAction(ISD::FPOW, MVT::f32, Expand);
325 setOperationAction(ISD::FPOW, MVT::f64, Expand);
326 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
327 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
328 if (Subtarget->hasFullFP16())
329 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
330 else
331 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
332
333 setOperationAction(ISD::FREM, MVT::f16, Promote);
334 setOperationAction(ISD::FREM, MVT::v4f16, Promote);
335 setOperationAction(ISD::FREM, MVT::v8f16, Promote);
336 setOperationAction(ISD::FPOW, MVT::f16, Promote);
337 setOperationAction(ISD::FPOW, MVT::v4f16, Promote);
338 setOperationAction(ISD::FPOW, MVT::v8f16, Promote);
339 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
340 setOperationAction(ISD::FCOS, MVT::f16, Promote);
341 setOperationAction(ISD::FCOS, MVT::v4f16, Promote);
342 setOperationAction(ISD::FCOS, MVT::v8f16, Promote);
343 setOperationAction(ISD::FSIN, MVT::f16, Promote);
344 setOperationAction(ISD::FSIN, MVT::v4f16, Promote);
345 setOperationAction(ISD::FSIN, MVT::v8f16, Promote);
346 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
347 setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
348 setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
349 setOperationAction(ISD::FEXP, MVT::f16, Promote);
350 setOperationAction(ISD::FEXP, MVT::v4f16, Promote);
351 setOperationAction(ISD::FEXP, MVT::v8f16, Promote);
352 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
353 setOperationAction(ISD::FEXP2, MVT::v4f16, Promote);
354 setOperationAction(ISD::FEXP2, MVT::v8f16, Promote);
355 setOperationAction(ISD::FLOG, MVT::f16, Promote);
356 setOperationAction(ISD::FLOG, MVT::v4f16, Promote);
357 setOperationAction(ISD::FLOG, MVT::v8f16, Promote);
358 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
359 setOperationAction(ISD::FLOG2, MVT::v4f16, Promote);
360 setOperationAction(ISD::FLOG2, MVT::v8f16, Promote);
361 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
362 setOperationAction(ISD::FLOG10, MVT::v4f16, Promote);
363 setOperationAction(ISD::FLOG10, MVT::v8f16, Promote);
364
365 if (!Subtarget->hasFullFP16()) {
366 setOperationAction(ISD::SELECT, MVT::f16, Promote);
367 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
368 setOperationAction(ISD::SETCC, MVT::f16, Promote);
369 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
370 setOperationAction(ISD::FADD, MVT::f16, Promote);
371 setOperationAction(ISD::FSUB, MVT::f16, Promote);
372 setOperationAction(ISD::FMUL, MVT::f16, Promote);
373 setOperationAction(ISD::FDIV, MVT::f16, Promote);
374 setOperationAction(ISD::FMA, MVT::f16, Promote);
375 setOperationAction(ISD::FNEG, MVT::f16, Promote);
376 setOperationAction(ISD::FABS, MVT::f16, Promote);
377 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
378 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
379 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
380 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
381 setOperationAction(ISD::FRINT, MVT::f16, Promote);
382 setOperationAction(ISD::FROUND, MVT::f16, Promote);
383 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
384 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
385 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
386 setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
387 setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
388
389 // promote v4f16 to v4f32 when that is known to be safe.
390 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
391 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
392 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
393 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
394 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
395 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
396 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
397 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
398 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
399 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
400 AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
401 AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
402
403 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
404 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
405 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
406 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
407 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
408 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
409 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
410 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
411 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
412 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
413 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
414 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
415 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
416 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
417 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
418
419 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
420 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
421 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
422 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
423 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
424 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
425 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
426 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
427 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
428 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
429 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
430 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
431 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
432 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
433 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
434 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
435 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
436 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
437 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
438 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
439 }
440
441 // AArch64 has implementations of a lot of rounding-like FP operations.
442 for (MVT Ty : {MVT::f32, MVT::f64}) {
443 setOperationAction(ISD::FFLOOR, Ty, Legal);
444 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
445 setOperationAction(ISD::FCEIL, Ty, Legal);
446 setOperationAction(ISD::FRINT, Ty, Legal);
447 setOperationAction(ISD::FTRUNC, Ty, Legal);
448 setOperationAction(ISD::FROUND, Ty, Legal);
449 setOperationAction(ISD::FMINNUM, Ty, Legal);
450 setOperationAction(ISD::FMAXNUM, Ty, Legal);
451 setOperationAction(ISD::FMINNAN, Ty, Legal);
452 setOperationAction(ISD::FMAXNAN, Ty, Legal);
453 }
454
455 if (Subtarget->hasFullFP16()) {
456 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
457 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
458 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
459 setOperationAction(ISD::FRINT, MVT::f16, Legal);
460 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
461 setOperationAction(ISD::FROUND, MVT::f16, Legal);
462 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
463 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
464 setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
465 setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
466 }
467
468 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
469
470 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
471 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
472 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
473 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
474 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
475
476 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
477 // This requires the Performance Monitors extension.
478 if (Subtarget->hasPerfMon())
479 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
480
481 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
482 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
483 // Issue __sincos_stret if available.
484 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
485 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
486 } else {
487 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
488 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
489 }
490
491 // Make floating-point constants legal for the large code model, so they don't
492 // become loads from the constant pool.
493 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
494 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
495 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
496 }
497
498 // AArch64 does not have floating-point extending loads, i1 sign-extending
499 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
500 for (MVT VT : MVT::fp_valuetypes()) {
501 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
502 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
503 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
504 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
505 }
506 for (MVT VT : MVT::integer_valuetypes())
507 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
508
509 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
510 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
511 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
512 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
513 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
514 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
515 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
516
517 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
518 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
519
520 // Indexed loads and stores are supported.
521 for (unsigned im = (unsigned)ISD::PRE_INC;
522 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
523 setIndexedLoadAction(im, MVT::i8, Legal);
524 setIndexedLoadAction(im, MVT::i16, Legal);
525 setIndexedLoadAction(im, MVT::i32, Legal);
526 setIndexedLoadAction(im, MVT::i64, Legal);
527 setIndexedLoadAction(im, MVT::f64, Legal);
528 setIndexedLoadAction(im, MVT::f32, Legal);
529 setIndexedLoadAction(im, MVT::f16, Legal);
530 setIndexedStoreAction(im, MVT::i8, Legal);
531 setIndexedStoreAction(im, MVT::i16, Legal);
532 setIndexedStoreAction(im, MVT::i32, Legal);
533 setIndexedStoreAction(im, MVT::i64, Legal);
534 setIndexedStoreAction(im, MVT::f64, Legal);
535 setIndexedStoreAction(im, MVT::f32, Legal);
536 setIndexedStoreAction(im, MVT::f16, Legal);
537 }
538
539 // Trap.
540 setOperationAction(ISD::TRAP, MVT::Other, Legal);
541
542 // We combine OR nodes for bitfield operations.
543 setTargetDAGCombine(ISD::OR);
544
545 // Vector add and sub nodes may conceal a high-half opportunity.
546 // Also, try to fold ADD into CSINC/CSINV..
547 setTargetDAGCombine(ISD::ADD);
548 setTargetDAGCombine(ISD::SUB);
549 setTargetDAGCombine(ISD::SRL);
550 setTargetDAGCombine(ISD::XOR);
551 setTargetDAGCombine(ISD::SINT_TO_FP);
552 setTargetDAGCombine(ISD::UINT_TO_FP);
553
554 setTargetDAGCombine(ISD::FP_TO_SINT);
555 setTargetDAGCombine(ISD::FP_TO_UINT);
556 setTargetDAGCombine(ISD::FDIV);
557
558 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
559
560 setTargetDAGCombine(ISD::ANY_EXTEND);
561 setTargetDAGCombine(ISD::ZERO_EXTEND);
562 setTargetDAGCombine(ISD::SIGN_EXTEND);
563 setTargetDAGCombine(ISD::BITCAST);
564 setTargetDAGCombine(ISD::CONCAT_VECTORS);
565 setTargetDAGCombine(ISD::STORE);
566 if (Subtarget->supportsAddressTopByteIgnored())
567 setTargetDAGCombine(ISD::LOAD);
568
569 setTargetDAGCombine(ISD::MUL);
570
571 setTargetDAGCombine(ISD::SELECT);
572 setTargetDAGCombine(ISD::VSELECT);
573
574 setTargetDAGCombine(ISD::INTRINSIC_VOID);
575 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
576 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
577
578 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
579 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
580 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
581
582 setStackPointerRegisterToSaveRestore(AArch64::SP);
583
584 setSchedulingPreference(Sched::Hybrid);
585
586 EnableExtLdPromotion = true;
587
588 // Set required alignment.
589 setMinFunctionAlignment(2);
590 // Set preferred alignments.
591 setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
592 setPrefLoopAlignment(STI.getPrefLoopAlignment());
593
594 // Only change the limit for entries in a jump table if specified by
595 // the subtarget, but not at the command line.
596 unsigned MaxJT = STI.getMaximumJumpTableSize();
597 if (MaxJT && getMaximumJumpTableSize() == 0)
598 setMaximumJumpTableSize(MaxJT);
599
600 setHasExtractBitsInsn(true);
601
602 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
603
604 if (Subtarget->hasNEON()) {
605 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
606 // silliness like this:
607 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
608 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
609 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
610 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
611 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
612 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
613 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
614 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
615 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
616 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
617 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
618 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
619 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
620 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
621 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
622 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
623 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
624 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
625 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
626 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
627 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
628 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
629 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
630 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
631 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
632
633 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
634 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
635 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
636 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
637 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
638
639 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
640
641 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
642 // elements smaller than i32, so promote the input to i32 first.
643 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
644 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
645 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
646 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
647 // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
648 // -> v8f16 conversions.
649 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
650 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
651 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
652 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
653 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
654 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
655 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
656 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
657 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
658 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
659 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
660 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
661 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
662
663 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
664 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
665
666 setOperationAction(ISD::CTTZ, MVT::v2i8, Expand);
667 setOperationAction(ISD::CTTZ, MVT::v4i16, Expand);
668 setOperationAction(ISD::CTTZ, MVT::v2i32, Expand);
669 setOperationAction(ISD::CTTZ, MVT::v1i64, Expand);
670 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
671 setOperationAction(ISD::CTTZ, MVT::v8i16, Expand);
672 setOperationAction(ISD::CTTZ, MVT::v4i32, Expand);
673 setOperationAction(ISD::CTTZ, MVT::v2i64, Expand);
674
675 // AArch64 doesn't have MUL.2d:
676 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
677 // Custom handling for some quad-vector types to detect MULL.
678 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
679 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
680 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
681
682 // Vector reductions
683 for (MVT VT : MVT::integer_valuetypes()) {
684 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
685 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
686 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
687 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
688 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
689 }
690 for (MVT VT : MVT::fp_valuetypes()) {
691 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
692 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
693 }
694
695 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
696 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
697 // Likewise, narrowing and extending vector loads/stores aren't handled
698 // directly.
699 for (MVT VT : MVT::vector_valuetypes()) {
700 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
701
702 setOperationAction(ISD::MULHS, VT, Expand);
703 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
704 setOperationAction(ISD::MULHU, VT, Expand);
705 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
706
707 setOperationAction(ISD::BSWAP, VT, Expand);
708
709 for (MVT InnerVT : MVT::vector_valuetypes()) {
710 setTruncStoreAction(VT, InnerVT, Expand);
711 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
712 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
713 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
714 }
715 }
716
717 // AArch64 has implementations of a lot of rounding-like FP operations.
718 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
719 setOperationAction(ISD::FFLOOR, Ty, Legal);
720 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
721 setOperationAction(ISD::FCEIL, Ty, Legal);
722 setOperationAction(ISD::FRINT, Ty, Legal);
723 setOperationAction(ISD::FTRUNC, Ty, Legal);
724 setOperationAction(ISD::FROUND, Ty, Legal);
725 }
726 }
727
728 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
729}
730
731void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
732 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 732, __extension__ __PRETTY_FUNCTION__))
;
733
734 if (VT.isFloatingPoint()) {
735 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
736 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
737 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
738 }
739
740 // Mark vector float intrinsics as expand.
741 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
742 setOperationAction(ISD::FSIN, VT, Expand);
743 setOperationAction(ISD::FCOS, VT, Expand);
744 setOperationAction(ISD::FPOW, VT, Expand);
745 setOperationAction(ISD::FLOG, VT, Expand);
746 setOperationAction(ISD::FLOG2, VT, Expand);
747 setOperationAction(ISD::FLOG10, VT, Expand);
748 setOperationAction(ISD::FEXP, VT, Expand);
749 setOperationAction(ISD::FEXP2, VT, Expand);
750
751 // But we do support custom-lowering for FCOPYSIGN.
752 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
753 }
754
755 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
756 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
757 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
758 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
759 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
760 setOperationAction(ISD::SRA, VT, Custom);
761 setOperationAction(ISD::SRL, VT, Custom);
762 setOperationAction(ISD::SHL, VT, Custom);
763 setOperationAction(ISD::AND, VT, Custom);
764 setOperationAction(ISD::OR, VT, Custom);
765 setOperationAction(ISD::SETCC, VT, Custom);
766 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
767
768 setOperationAction(ISD::SELECT, VT, Expand);
769 setOperationAction(ISD::SELECT_CC, VT, Expand);
770 setOperationAction(ISD::VSELECT, VT, Expand);
771 for (MVT InnerVT : MVT::all_valuetypes())
772 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
773
774 // CNT supports only B element sizes.
775 if (VT != MVT::v8i8 && VT != MVT::v16i8)
776 setOperationAction(ISD::CTPOP, VT, Expand);
777
778 setOperationAction(ISD::UDIV, VT, Expand);
779 setOperationAction(ISD::SDIV, VT, Expand);
780 setOperationAction(ISD::UREM, VT, Expand);
781 setOperationAction(ISD::SREM, VT, Expand);
782 setOperationAction(ISD::FREM, VT, Expand);
783
784 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
785 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
786
787 if (!VT.isFloatingPoint())
788 setOperationAction(ISD::ABS, VT, Legal);
789
790 // [SU][MIN|MAX] are available for all NEON types apart from i64.
791 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
792 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
793 setOperationAction(Opcode, VT, Legal);
794
795 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
796 if (VT.isFloatingPoint() &&
797 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
798 for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
799 ISD::FMINNUM, ISD::FMAXNUM})
800 setOperationAction(Opcode, VT, Legal);
801
802 if (Subtarget->isLittleEndian()) {
803 for (unsigned im = (unsigned)ISD::PRE_INC;
804 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
805 setIndexedLoadAction(im, VT, Legal);
806 setIndexedStoreAction(im, VT, Legal);
807 }
808 }
809}
810
811void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
812 addRegisterClass(VT, &AArch64::FPR64RegClass);
813 addTypeForNEON(VT, MVT::v2i32);
814}
815
816void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
817 addRegisterClass(VT, &AArch64::FPR128RegClass);
818 addTypeForNEON(VT, MVT::v4i32);
819}
820
821EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
822 EVT VT) const {
823 if (!VT.isVector())
824 return MVT::i32;
825 return VT.changeVectorElementTypeToInteger();
826}
827
828static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
829 const APInt &Demanded,
830 TargetLowering::TargetLoweringOpt &TLO,
831 unsigned NewOpc) {
832 uint64_t OldImm = Imm, NewImm, Enc;
833 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
834
835 // Return if the immediate is already all zeros, all ones, a bimm32 or a
836 // bimm64.
837 if (Imm == 0 || Imm == Mask ||
838 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
839 return false;
840
841 unsigned EltSize = Size;
842 uint64_t DemandedBits = Demanded.getZExtValue();
843
844 // Clear bits that are not demanded.
845 Imm &= DemandedBits;
846
847 while (true) {
848 // The goal here is to set the non-demanded bits in a way that minimizes
849 // the number of switching between 0 and 1. In order to achieve this goal,
850 // we set the non-demanded bits to the value of the preceding demanded bits.
851 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
852 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
853 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
854 // The final result is 0b11000011.
855 uint64_t NonDemandedBits = ~DemandedBits;
856 uint64_t InvertedImm = ~Imm & DemandedBits;
857 uint64_t RotatedImm =
858 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
859 NonDemandedBits;
860 uint64_t Sum = RotatedImm + NonDemandedBits;
861 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
862 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
863 NewImm = (Imm | Ones) & Mask;
864
865 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
866 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
867 // we halve the element size and continue the search.
868 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
869 break;
870
871 // We cannot shrink the element size any further if it is 2-bits.
872 if (EltSize == 2)
873 return false;
874
875 EltSize /= 2;
876 Mask >>= EltSize;
877 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
878
879 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
880 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
881 return false;
882
883 // Merge the upper and lower halves of Imm and DemandedBits.
884 Imm |= Hi;
885 DemandedBits |= DemandedBitsHi;
886 }
887
888 ++NumOptimizedImms;
889
890 // Replicate the element across the register width.
891 while (EltSize < Size) {
892 NewImm |= NewImm << EltSize;
893 EltSize *= 2;
894 }
895
896 (void)OldImm;
897 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 898, __extension__ __PRETTY_FUNCTION__))
898 "demanded bits should never be altered")(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 898, __extension__ __PRETTY_FUNCTION__))
;
899 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")(static_cast <bool> (OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? void (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 899, __extension__ __PRETTY_FUNCTION__))
;
900
901 // Create the new constant immediate node.
902 EVT VT = Op.getValueType();
903 SDLoc DL(Op);
904 SDValue New;
905
906 // If the new constant immediate is all-zeros or all-ones, let the target
907 // independent DAG combine optimize this node.
908 if (NewImm == 0 || NewImm == OrigMask) {
909 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
910 TLO.DAG.getConstant(NewImm, DL, VT));
911 // Otherwise, create a machine node so that target independent DAG combine
912 // doesn't undo this optimization.
913 } else {
914 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
915 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
916 New = SDValue(
917 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
918 }
919
920 return TLO.CombineTo(Op, New);
921}
922
923bool AArch64TargetLowering::targetShrinkDemandedConstant(
924 SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
925 // Delay this optimization to as late as possible.
926 if (!TLO.LegalOps)
927 return false;
928
929 if (!EnableOptimizeLogicalImm)
930 return false;
931
932 EVT VT = Op.getValueType();
933 if (VT.isVector())
934 return false;
935
936 unsigned Size = VT.getSizeInBits();
937 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 938, __extension__ __PRETTY_FUNCTION__))
938 "i32 or i64 is expected after legalization.")(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 938, __extension__ __PRETTY_FUNCTION__))
;
939
940 // Exit early if we demand all bits.
941 if (Demanded.countPopulation() == Size)
942 return false;
943
944 unsigned NewOpc;
945 switch (Op.getOpcode()) {
946 default:
947 return false;
948 case ISD::AND:
949 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
950 break;
951 case ISD::OR:
952 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
953 break;
954 case ISD::XOR:
955 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
956 break;
957 }
958 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
959 if (!C)
960 return false;
961 uint64_t Imm = C->getZExtValue();
962 return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
963}
964
965/// computeKnownBitsForTargetNode - Determine which of the bits specified in
966/// Mask are known to be either zero or one and return them Known.
967void AArch64TargetLowering::computeKnownBitsForTargetNode(
968 const SDValue Op, KnownBits &Known,
969 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
970 switch (Op.getOpcode()) {
971 default:
972 break;
973 case AArch64ISD::CSEL: {
974 KnownBits Known2;
975 DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
976 DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
977 Known.Zero &= Known2.Zero;
978 Known.One &= Known2.One;
979 break;
980 }
981 case ISD::INTRINSIC_W_CHAIN: {
982 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
983 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
984 switch (IntID) {
985 default: return;
986 case Intrinsic::aarch64_ldaxr:
987 case Intrinsic::aarch64_ldxr: {
988 unsigned BitWidth = Known.getBitWidth();
989 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
990 unsigned MemBits = VT.getScalarSizeInBits();
991 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
992 return;
993 }
994 }
995 break;
996 }
997 case ISD::INTRINSIC_WO_CHAIN:
998 case ISD::INTRINSIC_VOID: {
999 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1000 switch (IntNo) {
1001 default:
1002 break;
1003 case Intrinsic::aarch64_neon_umaxv:
1004 case Intrinsic::aarch64_neon_uminv: {
1005 // Figure out the datatype of the vector operand. The UMINV instruction
1006 // will zero extend the result, so we can mark as known zero all the
1007 // bits larger than the element datatype. 32-bit or larget doesn't need
1008 // this as those are legal types and will be handled by isel directly.
1009 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1010 unsigned BitWidth = Known.getBitWidth();
1011 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1012 assert(BitWidth >= 8 && "Unexpected width!")(static_cast <bool> (BitWidth >= 8 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1012, __extension__ __PRETTY_FUNCTION__))
;
1013 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1014 Known.Zero |= Mask;
1015 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1016 assert(BitWidth >= 16 && "Unexpected width!")(static_cast <bool> (BitWidth >= 16 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1016, __extension__ __PRETTY_FUNCTION__))
;
1017 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1018 Known.Zero |= Mask;
1019 }
1020 break;
1021 } break;
1022 }
1023 }
1024 }
1025}
1026
1027MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1028 EVT) const {
1029 return MVT::i64;
1030}
1031
1032bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1033 unsigned AddrSpace,
1034 unsigned Align,
1035 bool *Fast) const {
1036 if (Subtarget->requiresStrictAlign())
1037 return false;
1038
1039 if (Fast) {
1040 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1041 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1042 // See comments in performSTORECombine() for more details about
1043 // these conditions.
1044
1045 // Code that uses clang vector extensions can mark that it
1046 // wants unaligned accesses to be treated as fast by
1047 // underspecifying alignment to be 1 or 2.
1048 Align <= 2 ||
1049
1050 // Disregard v2i64. Memcpy lowering produces those and splitting
1051 // them regresses performance on micro-benchmarks and olden/bh.
1052 VT == MVT::v2i64;
1053 }
1054 return true;
1055}
1056
1057FastISel *
1058AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1059 const TargetLibraryInfo *libInfo) const {
1060 return AArch64::createFastISel(funcInfo, libInfo);
1061}
1062
1063const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1064 switch ((AArch64ISD::NodeType)Opcode) {
1065 case AArch64ISD::FIRST_NUMBER: break;
1066 case AArch64ISD::CALL: return "AArch64ISD::CALL";
1067 case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
1068 case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
1069 case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
1070 case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
1071 case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
1072 case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
1073 case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
1074 case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
1075 case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
1076 case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
1077 case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
1078 case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1079 case AArch64ISD::ADC: return "AArch64ISD::ADC";
1080 case AArch64ISD::SBC: return "AArch64ISD::SBC";
1081 case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
1082 case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
1083 case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
1084 case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
1085 case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
1086 case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
1087 case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
1088 case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
1089 case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
1090 case AArch64ISD::DUP: return "AArch64ISD::DUP";
1091 case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
1092 case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
1093 case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
1094 case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
1095 case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
1096 case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
1097 case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
1098 case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
1099 case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
1100 case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
1101 case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
1102 case AArch64ISD::BICi: return "AArch64ISD::BICi";
1103 case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
1104 case AArch64ISD::BSL: return "AArch64ISD::BSL";
1105 case AArch64ISD::NEG: return "AArch64ISD::NEG";
1106 case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
1107 case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
1108 case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
1109 case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
1110 case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
1111 case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
1112 case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
1113 case AArch64ISD::REV16: return "AArch64ISD::REV16";
1114 case AArch64ISD::REV32: return "AArch64ISD::REV32";
1115 case AArch64ISD::REV64: return "AArch64ISD::REV64";
1116 case AArch64ISD::EXT: return "AArch64ISD::EXT";
1117 case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
1118 case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
1119 case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
1120 case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
1121 case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
1122 case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
1123 case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
1124 case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
1125 case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
1126 case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
1127 case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
1128 case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
1129 case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
1130 case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
1131 case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
1132 case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
1133 case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
1134 case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
1135 case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
1136 case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
1137 case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
1138 case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
1139 case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
1140 case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
1141 case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
1142 case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
1143 case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
1144 case AArch64ISD::NOT: return "AArch64ISD::NOT";
1145 case AArch64ISD::BIT: return "AArch64ISD::BIT";
1146 case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
1147 case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
1148 case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
1149 case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
1150 case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
1151 case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
1152 case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
1153 case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
1154 case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
1155 case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
1156 case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
1157 case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
1158 case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
1159 case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
1160 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
1161 case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
1162 case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
1163 case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
1164 case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
1165 case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
1166 case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
1167 case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
1168 case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
1169 case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
1170 case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
1171 case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
1172 case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1173 case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1174 case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1175 case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1176 case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1177 case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1178 case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1179 case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1180 case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1181 case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1182 case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1183 case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1184 case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1185 case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1186 case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1187 case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1188 case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1189 case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1190 }
1191 return nullptr;
1192}
1193
1194MachineBasicBlock *
1195AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
1196 MachineBasicBlock *MBB) const {
1197 // We materialise the F128CSEL pseudo-instruction as some control flow and a
1198 // phi node:
1199
1200 // OrigBB:
1201 // [... previous instrs leading to comparison ...]
1202 // b.ne TrueBB
1203 // b EndBB
1204 // TrueBB:
1205 // ; Fallthrough
1206 // EndBB:
1207 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1208
1209 MachineFunction *MF = MBB->getParent();
1210 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1211 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1212 DebugLoc DL = MI.getDebugLoc();
1213 MachineFunction::iterator It = ++MBB->getIterator();
1214
1215 unsigned DestReg = MI.getOperand(0).getReg();
1216 unsigned IfTrueReg = MI.getOperand(1).getReg();
1217 unsigned IfFalseReg = MI.getOperand(2).getReg();
1218 unsigned CondCode = MI.getOperand(3).getImm();
1219 bool NZCVKilled = MI.getOperand(4).isKill();
1220
1221 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1222 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1223 MF->insert(It, TrueBB);
1224 MF->insert(It, EndBB);
1225
1226 // Transfer rest of current basic-block to EndBB
1227 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1228 MBB->end());
1229 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1230
1231 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1232 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1233 MBB->addSuccessor(TrueBB);
1234 MBB->addSuccessor(EndBB);
1235
1236 // TrueBB falls through to the end.
1237 TrueBB->addSuccessor(EndBB);
1238
1239 if (!NZCVKilled) {
1240 TrueBB->addLiveIn(AArch64::NZCV);
1241 EndBB->addLiveIn(AArch64::NZCV);
1242 }
1243
1244 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1245 .addReg(IfTrueReg)
1246 .addMBB(TrueBB)
1247 .addReg(IfFalseReg)
1248 .addMBB(MBB);
1249
1250 MI.eraseFromParent();
1251 return EndBB;
1252}
1253
1254MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
1255 MachineInstr &MI, MachineBasicBlock *BB) const {
1256 switch (MI.getOpcode()) {
1257 default:
1258#ifndef NDEBUG
1259 MI.dump();
1260#endif
1261 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1261)
;
1262
1263 case AArch64::F128CSEL:
1264 return EmitF128CSEL(MI, BB);
1265
1266 case TargetOpcode::STACKMAP:
1267 case TargetOpcode::PATCHPOINT:
1268 return emitPatchPoint(MI, BB);
1269 }
1270}
1271
1272//===----------------------------------------------------------------------===//
1273// AArch64 Lowering private implementation.
1274//===----------------------------------------------------------------------===//
1275
1276//===----------------------------------------------------------------------===//
1277// Lowering Code
1278//===----------------------------------------------------------------------===//
1279
1280/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1281/// CC
1282static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1283 switch (CC) {
1284 default:
1285 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1285)
;
1286 case ISD::SETNE:
1287 return AArch64CC::NE;
1288 case ISD::SETEQ:
1289 return AArch64CC::EQ;
1290 case ISD::SETGT:
1291 return AArch64CC::GT;
1292 case ISD::SETGE:
1293 return AArch64CC::GE;
1294 case ISD::SETLT:
1295 return AArch64CC::LT;
1296 case ISD::SETLE:
1297 return AArch64CC::LE;
1298 case ISD::SETUGT:
1299 return AArch64CC::HI;
1300 case ISD::SETUGE:
1301 return AArch64CC::HS;
1302 case ISD::SETULT:
1303 return AArch64CC::LO;
1304 case ISD::SETULE:
1305 return AArch64CC::LS;
1306 }
1307}
1308
1309/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1310static void changeFPCCToAArch64CC(ISD::CondCode CC,
1311 AArch64CC::CondCode &CondCode,
1312 AArch64CC::CondCode &CondCode2) {
1313 CondCode2 = AArch64CC::AL;
1314 switch (CC) {
1315 default:
1316 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1316)
;
1317 case ISD::SETEQ:
1318 case ISD::SETOEQ:
1319 CondCode = AArch64CC::EQ;
1320 break;
1321 case ISD::SETGT:
1322 case ISD::SETOGT:
1323 CondCode = AArch64CC::GT;
1324 break;
1325 case ISD::SETGE:
1326 case ISD::SETOGE:
1327 CondCode = AArch64CC::GE;
1328 break;
1329 case ISD::SETOLT:
1330 CondCode = AArch64CC::MI;
1331 break;
1332 case ISD::SETOLE:
1333 CondCode = AArch64CC::LS;
1334 break;
1335 case ISD::SETONE:
1336 CondCode = AArch64CC::MI;
1337 CondCode2 = AArch64CC::GT;
1338 break;
1339 case ISD::SETO:
1340 CondCode = AArch64CC::VC;
1341 break;
1342 case ISD::SETUO:
1343 CondCode = AArch64CC::VS;
1344 break;
1345 case ISD::SETUEQ:
1346 CondCode = AArch64CC::EQ;
1347 CondCode2 = AArch64CC::VS;
1348 break;
1349 case ISD::SETUGT:
1350 CondCode = AArch64CC::HI;
1351 break;
1352 case ISD::SETUGE:
1353 CondCode = AArch64CC::PL;
1354 break;
1355 case ISD::SETLT:
1356 case ISD::SETULT:
1357 CondCode = AArch64CC::LT;
1358 break;
1359 case ISD::SETLE:
1360 case ISD::SETULE:
1361 CondCode = AArch64CC::LE;
1362 break;
1363 case ISD::SETNE:
1364 case ISD::SETUNE:
1365 CondCode = AArch64CC::NE;
1366 break;
1367 }
1368}
1369
1370/// Convert a DAG fp condition code to an AArch64 CC.
1371/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1372/// should be AND'ed instead of OR'ed.
1373static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1374 AArch64CC::CondCode &CondCode,
1375 AArch64CC::CondCode &CondCode2) {
1376 CondCode2 = AArch64CC::AL;
1377 switch (CC) {
1378 default:
1379 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1380 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1380, __extension__ __PRETTY_FUNCTION__))
;
1381 break;
1382 case ISD::SETONE:
1383 // (a one b)
1384 // == ((a olt b) || (a ogt b))
1385 // == ((a ord b) && (a une b))
1386 CondCode = AArch64CC::VC;
1387 CondCode2 = AArch64CC::NE;
1388 break;
1389 case ISD::SETUEQ:
1390 // (a ueq b)
1391 // == ((a uno b) || (a oeq b))
1392 // == ((a ule b) && (a uge b))
1393 CondCode = AArch64CC::PL;
1394 CondCode2 = AArch64CC::LE;
1395 break;
1396 }
1397}
1398
1399/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1400/// CC usable with the vector instructions. Fewer operations are available
1401/// without a real NZCV register, so we have to use less efficient combinations
1402/// to get the same effect.
1403static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1404 AArch64CC::CondCode &CondCode,
1405 AArch64CC::CondCode &CondCode2,
1406 bool &Invert) {
1407 Invert = false;
1408 switch (CC) {
1409 default:
1410 // Mostly the scalar mappings work fine.
1411 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1412 break;
1413 case ISD::SETUO:
1414 Invert = true;
1415 LLVM_FALLTHROUGH[[clang::fallthrough]];
1416 case ISD::SETO:
1417 CondCode = AArch64CC::MI;
1418 CondCode2 = AArch64CC::GE;
1419 break;
1420 case ISD::SETUEQ:
1421 case ISD::SETULT:
1422 case ISD::SETULE:
1423 case ISD::SETUGT:
1424 case ISD::SETUGE:
1425 // All of the compare-mask comparisons are ordered, but we can switch
1426 // between the two by a double inversion. E.g. ULE == !OGT.
1427 Invert = true;
1428 changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
1429 break;
1430 }
1431}
1432
1433static bool isLegalArithImmed(uint64_t C) {
1434 // Matches AArch64DAGToDAGISel::SelectArithImmed().
1435 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1436 DEBUG(dbgs() << "Is imm " << C << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
1437 return IsLegal;
1438}
1439
1440static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1441 const SDLoc &dl, SelectionDAG &DAG) {
1442 EVT VT = LHS.getValueType();
1443 const bool FullFP16 =
1444 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1445
1446 if (VT.isFloatingPoint()) {
1447 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1447, __extension__ __PRETTY_FUNCTION__))
;
1448 if (VT == MVT::f16 && !FullFP16) {
1449 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1450 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1451 VT = MVT::f32;
1452 }
1453 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1454 }
1455
1456 // The CMP instruction is just an alias for SUBS, and representing it as
1457 // SUBS means that it's possible to get CSE with subtract operations.
1458 // A later phase can perform the optimization of setting the destination
1459 // register to WZR/XZR if it ends up being unused.
1460 unsigned Opcode = AArch64ISD::SUBS;
1461
1462 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
1463 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1464 // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
1465 // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
1466 // can be set differently by this operation. It comes down to whether
1467 // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1468 // everything is fine. If not then the optimization is wrong. Thus general
1469 // comparisons are only valid if op2 != 0.
1470
1471 // So, finally, the only LLVM-native comparisons that don't mention C and V
1472 // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1473 // the absence of information about op2.
1474 Opcode = AArch64ISD::ADDS;
1475 RHS = RHS.getOperand(1);
1476 } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
1477 !isUnsignedIntSetCC(CC)) {
1478 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1479 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1480 // of the signed comparisons.
1481 Opcode = AArch64ISD::ANDS;
1482 RHS = LHS.getOperand(1);
1483 LHS = LHS.getOperand(0);
1484 }
1485
1486 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1487 .getValue(1);
1488}
1489
1490/// \defgroup AArch64CCMP CMP;CCMP matching
1491///
1492/// These functions deal with the formation of CMP;CCMP;... sequences.
1493/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1494/// a comparison. They set the NZCV flags to a predefined value if their
1495/// predicate is false. This allows to express arbitrary conjunctions, for
1496/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
1497/// expressed as:
1498/// cmp A
1499/// ccmp B, inv(CB), CA
1500/// check for CB flags
1501///
1502/// In general we can create code for arbitrary "... (and (and A B) C)"
1503/// sequences. We can also implement some "or" expressions, because "(or A B)"
1504/// is equivalent to "not (and (not A) (not B))" and we can implement some
1505/// negation operations:
1506/// We can negate the results of a single comparison by inverting the flags
1507/// used when the predicate fails and inverting the flags tested in the next
1508/// instruction; We can also negate the results of the whole previous
1509/// conditional compare sequence by inverting the flags tested in the next
1510/// instruction. However there is no way to negate the result of a partial
1511/// sequence.
1512///
1513/// Therefore on encountering an "or" expression we can negate the subtree on
1514/// one side and have to be able to push the negate to the leafs of the subtree
1515/// on the other side (see also the comments in code). As complete example:
1516/// "or (or (setCA (cmp A)) (setCB (cmp B)))
1517/// (and (setCC (cmp C)) (setCD (cmp D)))"
1518/// is transformed to
1519/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
1520/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1521/// and implemented as:
1522/// cmp C
1523/// ccmp D, inv(CD), CC
1524/// ccmp A, CA, inv(CD)
1525/// ccmp B, CB, inv(CA)
1526/// check for CB flags
1527/// A counterexample is "or (and A B) (and C D)" which cannot be implemented
1528/// by conditional compare sequences.
1529/// @{
1530
1531/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1532static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1533 ISD::CondCode CC, SDValue CCOp,
1534 AArch64CC::CondCode Predicate,
1535 AArch64CC::CondCode OutCC,
1536 const SDLoc &DL, SelectionDAG &DAG) {
1537 unsigned Opcode = 0;
1538 const bool FullFP16 =
1539 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1540
1541 if (LHS.getValueType().isFloatingPoint()) {
1542 assert(LHS.getValueType() != MVT::f128)(static_cast <bool> (LHS.getValueType() != MVT::f128) ?
void (0) : __assert_fail ("LHS.getValueType() != MVT::f128",
"/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1542, __extension__ __PRETTY_FUNCTION__))
;
1543 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
1544 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1545 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1546 }
1547 Opcode = AArch64ISD::FCCMP;
1548 } else if (RHS.getOpcode() == ISD::SUB) {
1549 SDValue SubOp0 = RHS.getOperand(0);
1550 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1551 // See emitComparison() on why we can only do this for SETEQ and SETNE.
1552 Opcode = AArch64ISD::CCMN;
1553 RHS = RHS.getOperand(1);
1554 }
1555 }
1556 if (Opcode == 0)
1557 Opcode = AArch64ISD::CCMP;
1558
1559 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1560 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1561 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1562 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1563 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1564}
1565
1566/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
1567/// CanPushNegate is set to true if we can push a negate operation through
1568/// the tree in a was that we are left with AND operations and negate operations
1569/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
1570/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
1571/// brought into such a form.
1572static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate,
1573 unsigned Depth = 0) {
1574 if (!Val.hasOneUse())
1575 return false;
1576 unsigned Opcode = Val->getOpcode();
1577 if (Opcode == ISD::SETCC) {
1578 if (Val->getOperand(0).getValueType() == MVT::f128)
1579 return false;
1580 CanNegate = true;
1581 return true;
1582 }
1583 // Protect against exponential runtime and stack overflow.
1584 if (Depth > 6)
1585 return false;
1586 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1587 SDValue O0 = Val->getOperand(0);
1588 SDValue O1 = Val->getOperand(1);
1589 bool CanNegateL;
1590 if (!isConjunctionDisjunctionTree(O0, CanNegateL, Depth+1))
1591 return false;
1592 bool CanNegateR;
1593 if (!isConjunctionDisjunctionTree(O1, CanNegateR, Depth+1))
1594 return false;
1595
1596 if (Opcode == ISD::OR) {
1597 // For an OR expression we need to be able to negate at least one side or
1598 // we cannot do the transformation at all.
1599 if (!CanNegateL && !CanNegateR)
1600 return false;
1601 // We can however change a (not (or x y)) to (and (not x) (not y)) if we
1602 // can negate the x and y subtrees.
1603 CanNegate = CanNegateL && CanNegateR;
1604 } else {
1605 // If the operands are OR expressions then we finally need to negate their
1606 // outputs, we can only do that for the operand with emitted last by
1607 // negating OutCC, not for both operands.
1608 bool NeedsNegOutL = O0->getOpcode() == ISD::OR;
1609 bool NeedsNegOutR = O1->getOpcode() == ISD::OR;
1610 if (NeedsNegOutL && NeedsNegOutR)
1611 return false;
1612 // We cannot negate an AND operation (it would become an OR),
1613 CanNegate = false;
1614 }
1615 return true;
1616 }
1617 return false;
1618}
1619
1620/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1621/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1622/// Tries to transform the given i1 producing node @p Val to a series compare
1623/// and conditional compare operations. @returns an NZCV flags producing node
1624/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1625/// transformation was not possible.
1626/// On recursive invocations @p PushNegate may be set to true to have negation
1627/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
1628/// for the comparisons in the current subtree; @p Depth limits the search
1629/// depth to avoid stack overflow.
1630static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val,
1631 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1632 AArch64CC::CondCode Predicate) {
1633 // We're at a tree leaf, produce a conditional comparison operation.
1634 unsigned Opcode = Val->getOpcode();
1635 if (Opcode == ISD::SETCC) {
1636 SDValue LHS = Val->getOperand(0);
1637 SDValue RHS = Val->getOperand(1);
1638 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1639 bool isInteger = LHS.getValueType().isInteger();
1640 if (Negate)
1641 CC = getSetCCInverse(CC, isInteger);
1642 SDLoc DL(Val);
1643 // Determine OutCC and handle FP special case.
1644 if (isInteger) {
1645 OutCC = changeIntCCToAArch64CC(CC);
1646 } else {
1647 assert(LHS.getValueType().isFloatingPoint())(static_cast <bool> (LHS.getValueType().isFloatingPoint
()) ? void (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1647, __extension__ __PRETTY_FUNCTION__))
;
1648 AArch64CC::CondCode ExtraCC;
1649 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1650 // Some floating point conditions can't be tested with a single condition
1651 // code. Construct an additional comparison in this case.
1652 if (ExtraCC != AArch64CC::AL) {
1653 SDValue ExtraCmp;
1654 if (!CCOp.getNode())
1655 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1656 else
1657 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1658 ExtraCC, DL, DAG);
1659 CCOp = ExtraCmp;
1660 Predicate = ExtraCC;
1661 }
1662 }
1663
1664 // Produce a normal comparison if we are first in the chain
1665 if (!CCOp)
1666 return emitComparison(LHS, RHS, CC, DL, DAG);
1667 // Otherwise produce a ccmp.
1668 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
1669 DAG);
1670 }
1671 assert((Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) &&(static_cast <bool> ((Opcode == ISD::AND || (Opcode == ISD
::OR && Val->hasOneUse())) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1672, __extension__ __PRETTY_FUNCTION__))
1672 "Valid conjunction/disjunction tree")(static_cast <bool> ((Opcode == ISD::AND || (Opcode == ISD
::OR && Val->hasOneUse())) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1672, __extension__ __PRETTY_FUNCTION__))
;
1673
1674 // Check if both sides can be transformed.
1675 SDValue LHS = Val->getOperand(0);
1676 SDValue RHS = Val->getOperand(1);
1677
1678 // In case of an OR we need to negate our operands and the result.
1679 // (A v B) <=> not(not(A) ^ not(B))
1680 bool NegateOpsAndResult = Opcode == ISD::OR;
1681 // We can negate the results of all previous operations by inverting the
1682 // predicate flags giving us a free negation for one side. The other side
1683 // must be negatable by itself.
1684 if (NegateOpsAndResult) {
1685 // See which side we can negate.
1686 bool CanNegateL;
1687 bool isValidL = isConjunctionDisjunctionTree(LHS, CanNegateL);
1688 assert(isValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (isValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("isValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1688, __extension__ __PRETTY_FUNCTION__))
;
1689 (void)isValidL;
1690
1691#ifndef NDEBUG
1692 bool CanNegateR;
1693 bool isValidR = isConjunctionDisjunctionTree(RHS, CanNegateR);
1694 assert(isValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (isValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("isValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1694, __extension__ __PRETTY_FUNCTION__))
;
1695 assert((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree")(static_cast <bool> ((CanNegateL || CanNegateR) &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("(CanNegateL || CanNegateR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1695, __extension__ __PRETTY_FUNCTION__))
;
1696#endif
1697
1698 // Order the side which we cannot negate to RHS so we can emit it first.
1699 if (!CanNegateL)
1700 std::swap(LHS, RHS);
1701 } else {
1702 bool NeedsNegOutL = LHS->getOpcode() == ISD::OR;
1703 assert((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) &&(static_cast <bool> ((!NeedsNegOutL || RHS->getOpcode
() != ISD::OR) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1704, __extension__ __PRETTY_FUNCTION__))
1704 "Valid conjunction/disjunction tree")(static_cast <bool> ((!NeedsNegOutL || RHS->getOpcode
() != ISD::OR) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1704, __extension__ __PRETTY_FUNCTION__))
;
1705 // Order the side where we need to negate the output flags to RHS so it
1706 // gets emitted first.
1707 if (NeedsNegOutL)
1708 std::swap(LHS, RHS);
1709 }
1710
1711 // Emit RHS. If we want to negate the tree we only need to push a negate
1712 // through if we are already in a PushNegate case, otherwise we can negate
1713 // the "flags to test" afterwards.
1714 AArch64CC::CondCode RHSCC;
1715 SDValue CmpR = emitConjunctionDisjunctionTreeRec(DAG, RHS, RHSCC, Negate,
1716 CCOp, Predicate);
1717 if (NegateOpsAndResult && !Negate)
1718 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1719 // Emit LHS. We may need to negate it.
1720 SDValue CmpL = emitConjunctionDisjunctionTreeRec(DAG, LHS, OutCC,
1721 NegateOpsAndResult, CmpR,
1722 RHSCC);
1723 // If we transformed an OR to and AND then we have to negate the result
1724 // (or absorb the Negate parameter).
1725 if (NegateOpsAndResult && !Negate)
1726 OutCC = AArch64CC::getInvertedCondCode(OutCC);
1727 return CmpL;
1728}
1729
1730/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1731/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1732/// \see emitConjunctionDisjunctionTreeRec().
1733static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
1734 AArch64CC::CondCode &OutCC) {
1735 bool CanNegate;
1736 if (!isConjunctionDisjunctionTree(Val, CanNegate))
1737 return SDValue();
1738
1739 return emitConjunctionDisjunctionTreeRec(DAG, Val, OutCC, false, SDValue(),
1740 AArch64CC::AL);
1741}
1742
1743/// @}
1744
1745static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1746 SDValue &AArch64cc, SelectionDAG &DAG,
1747 const SDLoc &dl) {
1748 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1749 EVT VT = RHS.getValueType();
1750 uint64_t C = RHSC->getZExtValue();
1751 if (!isLegalArithImmed(C)) {
1752 // Constant does not fit, try adjusting it by one?
1753 switch (CC) {
1754 default:
1755 break;
1756 case ISD::SETLT:
1757 case ISD::SETGE:
1758 if ((VT == MVT::i32 && C != 0x80000000 &&
1759 isLegalArithImmed((uint32_t)(C - 1))) ||
1760 (VT == MVT::i64 && C != 0x80000000ULL &&
1761 isLegalArithImmed(C - 1ULL))) {
1762 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1763 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1764 RHS = DAG.getConstant(C, dl, VT);
1765 }
1766 break;
1767 case ISD::SETULT:
1768 case ISD::SETUGE:
1769 if ((VT == MVT::i32 && C != 0 &&
1770 isLegalArithImmed((uint32_t)(C - 1))) ||
1771 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
1772 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1773 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1774 RHS = DAG.getConstant(C, dl, VT);
1775 }
1776 break;
1777 case ISD::SETLE:
1778 case ISD::SETGT:
1779 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
1780 isLegalArithImmed((uint32_t)(C + 1))) ||
1781 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
1782 isLegalArithImmed(C + 1ULL))) {
1783 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1784 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1785 RHS = DAG.getConstant(C, dl, VT);
1786 }
1787 break;
1788 case ISD::SETULE:
1789 case ISD::SETUGT:
1790 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
1791 isLegalArithImmed((uint32_t)(C + 1))) ||
1792 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
1793 isLegalArithImmed(C + 1ULL))) {
1794 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1795 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1796 RHS = DAG.getConstant(C, dl, VT);
1797 }
1798 break;
1799 }
1800 }
1801 }
1802 SDValue Cmp;
1803 AArch64CC::CondCode AArch64CC;
1804 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
1805 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
1806
1807 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
1808 // For the i8 operand, the largest immediate is 255, so this can be easily
1809 // encoded in the compare instruction. For the i16 operand, however, the
1810 // largest immediate cannot be encoded in the compare.
1811 // Therefore, use a sign extending load and cmn to avoid materializing the
1812 // -1 constant. For example,
1813 // movz w1, #65535
1814 // ldrh w0, [x0, #0]
1815 // cmp w0, w1
1816 // >
1817 // ldrsh w0, [x0, #0]
1818 // cmn w0, #1
1819 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
1820 // if and only if (sext LHS) == (sext RHS). The checks are in place to
1821 // ensure both the LHS and RHS are truly zero extended and to make sure the
1822 // transformation is profitable.
1823 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
1824 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
1825 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
1826 LHS.getNode()->hasNUsesOfValue(1, 0)) {
1827 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
1828 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
1829 SDValue SExt =
1830 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
1831 DAG.getValueType(MVT::i16));
1832 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
1833 RHS.getValueType()),
1834 CC, dl, DAG);
1835 AArch64CC = changeIntCCToAArch64CC(CC);
1836 }
1837 }
1838
1839 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
1840 if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
1841 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
1842 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
1843 }
1844 }
1845 }
1846
1847 if (!Cmp) {
1848 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
1849 AArch64CC = changeIntCCToAArch64CC(CC);
1850 }
1851 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
1852 return Cmp;
1853}
1854
1855static std::pair<SDValue, SDValue>
1856getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
1857 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1858, __extension__ __PRETTY_FUNCTION__))
1858 "Unsupported value type")(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1858, __extension__ __PRETTY_FUNCTION__))
;
1859 SDValue Value, Overflow;
1860 SDLoc DL(Op);
1861 SDValue LHS = Op.getOperand(0);
1862 SDValue RHS = Op.getOperand(1);
1863 unsigned Opc = 0;
1864 switch (Op.getOpcode()) {
1865 default:
1866 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1866)
;
1867 case ISD::SADDO:
1868 Opc = AArch64ISD::ADDS;
1869 CC = AArch64CC::VS;
1870 break;
1871 case ISD::UADDO:
1872 Opc = AArch64ISD::ADDS;
1873 CC = AArch64CC::HS;
1874 break;
1875 case ISD::SSUBO:
1876 Opc = AArch64ISD::SUBS;
1877 CC = AArch64CC::VS;
1878 break;
1879 case ISD::USUBO:
1880 Opc = AArch64ISD::SUBS;
1881 CC = AArch64CC::LO;
1882 break;
1883 // Multiply needs a little bit extra work.
1884 case ISD::SMULO:
1885 case ISD::UMULO: {
1886 CC = AArch64CC::NE;
1887 bool IsSigned = Op.getOpcode() == ISD::SMULO;
1888 if (Op.getValueType() == MVT::i32) {
1889 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1890 // For a 32 bit multiply with overflow check we want the instruction
1891 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
1892 // need to generate the following pattern:
1893 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
1894 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
1895 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
1896 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1897 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
1898 DAG.getConstant(0, DL, MVT::i64));
1899 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
1900 // operation. We need to clear out the upper 32 bits, because we used a
1901 // widening multiply that wrote all 64 bits. In the end this should be a
1902 // noop.
1903 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
1904 if (IsSigned) {
1905 // The signed overflow check requires more than just a simple check for
1906 // any bit set in the upper 32 bits of the result. These bits could be
1907 // just the sign bits of a negative number. To perform the overflow
1908 // check we have to arithmetic shift right the 32nd bit of the result by
1909 // 31 bits. Then we compare the result to the upper 32 bits.
1910 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
1911 DAG.getConstant(32, DL, MVT::i64));
1912 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
1913 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
1914 DAG.getConstant(31, DL, MVT::i64));
1915 // It is important that LowerBits is last, otherwise the arithmetic
1916 // shift will not be folded into the compare (SUBS).
1917 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
1918 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1919 .getValue(1);
1920 } else {
1921 // The overflow check for unsigned multiply is easy. We only need to
1922 // check if any of the upper 32 bits are set. This can be done with a
1923 // CMP (shifted register). For that we need to generate the following
1924 // pattern:
1925 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
1926 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
1927 DAG.getConstant(32, DL, MVT::i64));
1928 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1929 Overflow =
1930 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1931 DAG.getConstant(0, DL, MVT::i64),
1932 UpperBits).getValue(1);
1933 }
1934 break;
1935 }
1936 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Expected an i64 value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1936, __extension__ __PRETTY_FUNCTION__))
;
1937 // For the 64 bit multiply
1938 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1939 if (IsSigned) {
1940 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
1941 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
1942 DAG.getConstant(63, DL, MVT::i64));
1943 // It is important that LowerBits is last, otherwise the arithmetic
1944 // shift will not be folded into the compare (SUBS).
1945 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1946 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1947 .getValue(1);
1948 } else {
1949 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
1950 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1951 Overflow =
1952 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1953 DAG.getConstant(0, DL, MVT::i64),
1954 UpperBits).getValue(1);
1955 }
1956 break;
1957 }
1958 } // switch (...)
1959
1960 if (Opc) {
1961 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
1962
1963 // Emit the AArch64 operation with overflow check.
1964 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
1965 Overflow = Value.getValue(1);
1966 }
1967 return std::make_pair(Value, Overflow);
1968}
1969
1970SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
1971 RTLIB::Libcall Call) const {
1972 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
1973 return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
1974}
1975
1976// Returns true if the given Op is the overflow flag result of an overflow
1977// intrinsic operation.
1978static bool isOverflowIntrOpRes(SDValue Op) {
1979 unsigned Opc = Op.getOpcode();
1980 return (Op.getResNo() == 1 &&
1981 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
1982 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
1983}
1984
1985static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
1986 SDValue Sel = Op.getOperand(0);
1987 SDValue Other = Op.getOperand(1);
1988 SDLoc dl(Sel);
1989
1990 // If the operand is an overflow checking operation, invert the condition
1991 // code and kill the Not operation. I.e., transform:
1992 // (xor (overflow_op_bool, 1))
1993 // -->
1994 // (csel 1, 0, invert(cc), overflow_op_bool)
1995 // ... which later gets transformed to just a cset instruction with an
1996 // inverted condition code, rather than a cset + eor sequence.
1997 if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
1998 // Only lower legal XALUO ops.
1999 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2000 return SDValue();
2001
2002 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2003 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2004 AArch64CC::CondCode CC;
2005 SDValue Value, Overflow;
2006 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2007 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2008 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2009 CCVal, Overflow);
2010 }
2011 // If neither operand is a SELECT_CC, give up.
2012 if (Sel.getOpcode() != ISD::SELECT_CC)
2013 std::swap(Sel, Other);
2014 if (Sel.getOpcode() != ISD::SELECT_CC)
2015 return Op;
2016
2017 // The folding we want to perform is:
2018 // (xor x, (select_cc a, b, cc, 0, -1) )
2019 // -->
2020 // (csel x, (xor x, -1), cc ...)
2021 //
2022 // The latter will get matched to a CSINV instruction.
2023
2024 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2025 SDValue LHS = Sel.getOperand(0);
2026 SDValue RHS = Sel.getOperand(1);
2027 SDValue TVal = Sel.getOperand(2);
2028 SDValue FVal = Sel.getOperand(3);
2029
2030 // FIXME: This could be generalized to non-integer comparisons.
2031 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2032 return Op;
2033
2034 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2035 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2036
2037 // The values aren't constants, this isn't the pattern we're looking for.
2038 if (!CFVal || !CTVal)
2039 return Op;
2040
2041 // We can commute the SELECT_CC by inverting the condition. This
2042 // might be needed to make this fit into a CSINV pattern.
2043 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2044 std::swap(TVal, FVal);
2045 std::swap(CTVal, CFVal);
2046 CC = ISD::getSetCCInverse(CC, true);
2047 }
2048
2049 // If the constants line up, perform the transform!
2050 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2051 SDValue CCVal;
2052 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2053
2054 FVal = Other;
2055 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2056 DAG.getConstant(-1ULL, dl, Other.getValueType()));
2057
2058 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2059 CCVal, Cmp);
2060 }
2061
2062 return Op;
2063}
2064
2065static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2066 EVT VT = Op.getValueType();
2067
2068 // Let legalize expand this if it isn't a legal type yet.
2069 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2070 return SDValue();
2071
2072 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2073
2074 unsigned Opc;
2075 bool ExtraOp = false;
2076 switch (Op.getOpcode()) {
2077 default:
2078 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2078)
;
2079 case ISD::ADDC:
2080 Opc = AArch64ISD::ADDS;
2081 break;
2082 case ISD::SUBC:
2083 Opc = AArch64ISD::SUBS;
2084 break;
2085 case ISD::ADDE:
2086 Opc = AArch64ISD::ADCS;
2087 ExtraOp = true;
2088 break;
2089 case ISD::SUBE:
2090 Opc = AArch64ISD::SBCS;
2091 ExtraOp = true;
2092 break;
2093 }
2094
2095 if (!ExtraOp)
2096 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2097 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2098 Op.getOperand(2));
2099}
2100
2101static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
2102 // Let legalize expand this if it isn't a legal type yet.
2103 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
2104 return SDValue();
2105
2106 SDLoc dl(Op);
2107 AArch64CC::CondCode CC;
2108 // The actual operation that sets the overflow or carry flag.
2109 SDValue Value, Overflow;
2110 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2111
2112 // We use 0 and 1 as false and true values.
2113 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2114 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2115
2116 // We use an inverted condition, because the conditional select is inverted
2117 // too. This will allow it to be selected to a single instruction:
2118 // CSINC Wd, WZR, WZR, invert(cond).
2119 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2120 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2121 CCVal, Overflow);
2122
2123 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2124 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2125}
2126
2127// Prefetch operands are:
2128// 1: Address to prefetch
2129// 2: bool isWrite
2130// 3: int locality (0 = no locality ... 3 = extreme locality)
2131// 4: bool isDataCache
2132static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
2133 SDLoc DL(Op);
2134 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2135 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2136 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2137
2138 bool IsStream = !Locality;
2139 // When the locality number is set
2140 if (Locality) {
2141 // The front-end should have filtered out the out-of-range values
2142 assert(Locality <= 3 && "Prefetch locality out-of-range")(static_cast <bool> (Locality <= 3 && "Prefetch locality out-of-range"
) ? void (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2142, __extension__ __PRETTY_FUNCTION__))
;
2143 // The locality degree is the opposite of the cache speed.
2144 // Put the number the other way around.
2145 // The encoding starts at 0 for level 1
2146 Locality = 3 - Locality;
2147 }
2148
2149 // built the mask value encoding the expected behavior.
2150 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
2151 (!IsData << 3) | // IsDataCache bit
2152 (Locality << 1) | // Cache level bits
2153 (unsigned)IsStream; // Stream bit
2154 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2155 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2156}
2157
2158SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2159 SelectionDAG &DAG) const {
2160 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")(static_cast <bool> (Op.getValueType() == MVT::f128 &&
"Unexpected lowering") ? void (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2160, __extension__ __PRETTY_FUNCTION__))
;
2161
2162 RTLIB::Libcall LC;
2163 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
2164
2165 return LowerF128Call(Op, DAG, LC);
2166}
2167
2168SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2169 SelectionDAG &DAG) const {
2170 if (Op.getOperand(0).getValueType() != MVT::f128) {
2171 // It's legal except when f128 is involved
2172 return Op;
2173 }
2174
2175 RTLIB::Libcall LC;
2176 LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
2177
2178 // FP_ROUND node has a second operand indicating whether it is known to be
2179 // precise. That doesn't take part in the LibCall so we can't directly use
2180 // LowerF128Call.
2181 SDValue SrcVal = Op.getOperand(0);
2182 return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
2183 SDLoc(Op)).first;
2184}
2185
2186static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2187 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2188 // Any additional optimization in this function should be recorded
2189 // in the cost tables.
2190 EVT InVT = Op.getOperand(0).getValueType();
2191 EVT VT = Op.getValueType();
2192 unsigned NumElts = InVT.getVectorNumElements();
2193
2194 // f16 vectors are promoted to f32 before a conversion.
2195 if (InVT.getVectorElementType() == MVT::f16) {
2196 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2197 SDLoc dl(Op);
2198 return DAG.getNode(
2199 Op.getOpcode(), dl, Op.getValueType(),
2200 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2201 }
2202
2203 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2204 SDLoc dl(Op);
2205 SDValue Cv =
2206 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2207 Op.getOperand(0));
2208 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2209 }
2210
2211 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2212 SDLoc dl(Op);
2213 MVT ExtVT =
2214 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
2215 VT.getVectorNumElements());
2216 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2217 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2218 }
2219
2220 // Type changing conversions are illegal.
2221 return Op;
2222}
2223
2224SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2225 SelectionDAG &DAG) const {
2226 if (Op.getOperand(0).getValueType().isVector())
2227 return LowerVectorFP_TO_INT(Op, DAG);
2228
2229 // f16 conversions are promoted to f32 when full fp16 is not supported.
2230 if (Op.getOperand(0).getValueType() == MVT::f16 &&
2231 !Subtarget->hasFullFP16()) {
2232 SDLoc dl(Op);
2233 return DAG.getNode(
2234 Op.getOpcode(), dl, Op.getValueType(),
2235 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
2236 }
2237
2238 if (Op.getOperand(0).getValueType() != MVT::f128) {
2239 // It's legal except when f128 is involved
2240 return Op;
2241 }
2242
2243 RTLIB::Libcall LC;
2244 if (Op.getOpcode() == ISD::FP_TO_SINT)
2245 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
2246 else
2247 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
2248
2249 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2250 return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
2251}
2252
2253static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2254 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2255 // Any additional optimization in this function should be recorded
2256 // in the cost tables.
2257 EVT VT = Op.getValueType();
2258 SDLoc dl(Op);
2259 SDValue In = Op.getOperand(0);
2260 EVT InVT = In.getValueType();
2261
2262 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2263 MVT CastVT =
2264 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
2265 InVT.getVectorNumElements());
2266 In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2267 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2268 }
2269
2270 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2271 unsigned CastOpc =
2272 Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2273 EVT CastVT = VT.changeVectorElementTypeToInteger();
2274 In = DAG.getNode(CastOpc, dl, CastVT, In);
2275 return DAG.getNode(Op.getOpcode(), dl, VT, In);
2276 }
2277
2278 return Op;
2279}
2280
2281SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2282 SelectionDAG &DAG) const {
2283 if (Op.getValueType().isVector())
2284 return LowerVectorINT_TO_FP(Op, DAG);
2285
2286 // f16 conversions are promoted to f32 when full fp16 is not supported.
2287 if (Op.getValueType() == MVT::f16 &&
2288 !Subtarget->hasFullFP16()) {
2289 SDLoc dl(Op);
2290 return DAG.getNode(
2291 ISD::FP_ROUND, dl, MVT::f16,
2292 DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
2293 DAG.getIntPtrConstant(0, dl));
2294 }
2295
2296 // i128 conversions are libcalls.
2297 if (Op.getOperand(0).getValueType() == MVT::i128)
2298 return SDValue();
2299
2300 // Other conversions are legal, unless it's to the completely software-based
2301 // fp128.
2302 if (Op.getValueType() != MVT::f128)
2303 return Op;
2304
2305 RTLIB::Libcall LC;
2306 if (Op.getOpcode() == ISD::SINT_TO_FP)
2307 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2308 else
2309 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2310
2311 return LowerF128Call(Op, DAG, LC);
2312}
2313
2314SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2315 SelectionDAG &DAG) const {
2316 // For iOS, we want to call an alternative entry point: __sincos_stret,
2317 // which returns the values in two S / D registers.
2318 SDLoc dl(Op);
2319 SDValue Arg = Op.getOperand(0);
2320 EVT ArgVT = Arg.getValueType();
2321 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2322
2323 ArgListTy Args;
2324 ArgListEntry Entry;
2325
2326 Entry.Node = Arg;
2327 Entry.Ty = ArgTy;
2328 Entry.IsSExt = false;
2329 Entry.IsZExt = false;
2330 Args.push_back(Entry);
2331
2332 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
2333 : RTLIB::SINCOS_STRET_F32;
2334 const char *LibcallName = getLibcallName(LC);
2335 SDValue Callee =
2336 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2337
2338 StructType *RetTy = StructType::get(ArgTy, ArgTy);
2339 TargetLowering::CallLoweringInfo CLI(DAG);
2340 CLI.setDebugLoc(dl)
2341 .setChain(DAG.getEntryNode())
2342 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2343
2344 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2345 return CallResult.first;
2346}
2347
2348static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2349 if (Op.getValueType() != MVT::f16)
2350 return SDValue();
2351
2352 assert(Op.getOperand(0).getValueType() == MVT::i16)(static_cast <bool> (Op.getOperand(0).getValueType() ==
MVT::i16) ? void (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2352, __extension__ __PRETTY_FUNCTION__))
;
2353 SDLoc DL(Op);
2354
2355 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2356 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2357 return SDValue(
2358 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2359 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2360 0);
2361}
2362
2363static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2364 if (OrigVT.getSizeInBits() >= 64)
2365 return OrigVT;
2366
2367 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2367, __extension__ __PRETTY_FUNCTION__))
;
2368
2369 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2370 switch (OrigSimpleTy) {
2371 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2371)
;
2372 case MVT::v2i8:
2373 case MVT::v2i16:
2374 return MVT::v2i32;
2375 case MVT::v4i8:
2376 return MVT::v4i16;
2377 }
2378}
2379
2380static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2381 const EVT &OrigTy,
2382 const EVT &ExtTy,
2383 unsigned ExtOpcode) {
2384 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2385 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2386 // 64-bits we need to insert a new extension so that it will be 64-bits.
2387 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2387, __extension__ __PRETTY_FUNCTION__))
;
2388 if (OrigTy.getSizeInBits() >= 64)
2389 return N;
2390
2391 // Must extend size to at least 64 bits to be used as an operand for VMULL.
2392 EVT NewVT = getExtensionTo64Bits(OrigTy);
2393
2394 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2395}
2396
2397static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2398 bool isSigned) {
2399 EVT VT = N->getValueType(0);
2400
2401 if (N->getOpcode() != ISD::BUILD_VECTOR)
2402 return false;
2403
2404 for (const SDValue &Elt : N->op_values()) {
2405 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2406 unsigned EltSize = VT.getScalarSizeInBits();
2407 unsigned HalfSize = EltSize / 2;
2408 if (isSigned) {
2409 if (!isIntN(HalfSize, C->getSExtValue()))
2410 return false;
2411 } else {
2412 if (!isUIntN(HalfSize, C->getZExtValue()))
2413 return false;
2414 }
2415 continue;
2416 }
2417 return false;
2418 }
2419
2420 return true;
2421}
2422
2423static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2424 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2425 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2426 N->getOperand(0)->getValueType(0),
2427 N->getValueType(0),
2428 N->getOpcode());
2429
2430 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2430, __extension__ __PRETTY_FUNCTION__))
;
2431 EVT VT = N->getValueType(0);
2432 SDLoc dl(N);
2433 unsigned EltSize = VT.getScalarSizeInBits() / 2;
2434 unsigned NumElts = VT.getVectorNumElements();
2435 MVT TruncVT = MVT::getIntegerVT(EltSize);
2436 SmallVector<SDValue, 8> Ops;
2437 for (unsigned i = 0; i != NumElts; ++i) {
2438 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2439 const APInt &CInt = C->getAPIntValue();
2440 // Element types smaller than 32 bits are not legal, so use i32 elements.
2441 // The values are implicitly truncated so sext vs. zext doesn't matter.
2442 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2443 }
2444 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2445}
2446
2447static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2448 return N->getOpcode() == ISD::SIGN_EXTEND ||
2449 isExtendedBUILD_VECTOR(N, DAG, true);
2450}
2451
2452static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2453 return N->getOpcode() == ISD::ZERO_EXTEND ||
2454 isExtendedBUILD_VECTOR(N, DAG, false);
2455}
2456
2457static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2458 unsigned Opcode = N->getOpcode();
2459 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2460 SDNode *N0 = N->getOperand(0).getNode();
2461 SDNode *N1 = N->getOperand(1).getNode();
2462 return N0->hasOneUse() && N1->hasOneUse() &&
2463 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2464 }
2465 return false;
2466}
2467
2468static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2469 unsigned Opcode = N->getOpcode();
2470 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2471 SDNode *N0 = N->getOperand(0).getNode();
2472 SDNode *N1 = N->getOperand(1).getNode();
2473 return N0->hasOneUse() && N1->hasOneUse() &&
2474 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2475 }
2476 return false;
2477}
2478
2479static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2480 // Multiplications are only custom-lowered for 128-bit vectors so that
2481 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2482 EVT VT = Op.getValueType();
2483 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2484, __extension__ __PRETTY_FUNCTION__))
2484 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2484, __extension__ __PRETTY_FUNCTION__))
;
2485 SDNode *N0 = Op.getOperand(0).getNode();
2486 SDNode *N1 = Op.getOperand(1).getNode();
2487 unsigned NewOpc = 0;
2488 bool isMLA = false;
2489 bool isN0SExt = isSignExtended(N0, DAG);
2490 bool isN1SExt = isSignExtended(N1, DAG);
2491 if (isN0SExt && isN1SExt)
2492 NewOpc = AArch64ISD::SMULL;
2493 else {
2494 bool isN0ZExt = isZeroExtended(N0, DAG);
2495 bool isN1ZExt = isZeroExtended(N1, DAG);
2496 if (isN0ZExt && isN1ZExt)
2497 NewOpc = AArch64ISD::UMULL;
2498 else if (isN1SExt || isN1ZExt) {
2499 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2500 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2501 if (isN1SExt && isAddSubSExt(N0, DAG)) {
2502 NewOpc = AArch64ISD::SMULL;
2503 isMLA = true;
2504 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2505 NewOpc = AArch64ISD::UMULL;
2506 isMLA = true;
2507 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2508 std::swap(N0, N1);
2509 NewOpc = AArch64ISD::UMULL;
2510 isMLA = true;
2511 }
2512 }
2513
2514 if (!NewOpc) {
2515 if (VT == MVT::v2i64)
2516 // Fall through to expand this. It is not legal.
2517 return SDValue();
2518 else
2519 // Other vector multiplications are legal.
2520 return Op;
2521 }
2522 }
2523
2524 // Legalize to a S/UMULL instruction
2525 SDLoc DL(Op);
2526 SDValue Op0;
2527 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2528 if (!isMLA) {
2529 Op0 = skipExtensionForVectorMULL(N0, DAG);
2530 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2532, __extension__ __PRETTY_FUNCTION__))
2531 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2532, __extension__ __PRETTY_FUNCTION__))
2532 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2532, __extension__ __PRETTY_FUNCTION__))
;
2533 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2534 }
2535 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2536 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2537 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2538 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2539 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2540 EVT Op1VT = Op1.getValueType();
2541 return DAG.getNode(N0->getOpcode(), DL, VT,
2542 DAG.getNode(NewOpc, DL, VT,
2543 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2544 DAG.getNode(NewOpc, DL, VT,
2545 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2546}
2547
2548SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2549 SelectionDAG &DAG) const {
2550 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2551 SDLoc dl(Op);
2552 switch (IntNo) {
2553 default: return SDValue(); // Don't custom lower most intrinsics.
2554 case Intrinsic::thread_pointer: {
2555 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2556 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2557 }
2558 case Intrinsic::aarch64_neon_abs:
2559 return DAG.getNode(ISD::ABS, dl, Op.getValueType(),
2560 Op.getOperand(1));
2561 case Intrinsic::aarch64_neon_smax:
2562 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2563 Op.getOperand(1), Op.getOperand(2));
2564 case Intrinsic::aarch64_neon_umax:
2565 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2566 Op.getOperand(1), Op.getOperand(2));
2567 case Intrinsic::aarch64_neon_smin:
2568 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2569 Op.getOperand(1), Op.getOperand(2));
2570 case Intrinsic::aarch64_neon_umin:
2571 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2572 Op.getOperand(1), Op.getOperand(2));
2573 }
2574}
2575
2576SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
2577 SelectionDAG &DAG) const {
2578 DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
2579 DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
2580
2581 switch (Op.getOpcode()) {
2582 default:
2583 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2583)
;
2584 return SDValue();
2585 case ISD::BITCAST:
2586 return LowerBITCAST(Op, DAG);
2587 case ISD::GlobalAddress:
2588 return LowerGlobalAddress(Op, DAG);
2589 case ISD::GlobalTLSAddress:
2590 return LowerGlobalTLSAddress(Op, DAG);
2591 case ISD::SETCC:
2592 return LowerSETCC(Op, DAG);
2593 case ISD::BR_CC:
2594 return LowerBR_CC(Op, DAG);
2595 case ISD::SELECT:
2596 return LowerSELECT(Op, DAG);
2597 case ISD::SELECT_CC:
2598 return LowerSELECT_CC(Op, DAG);
2599 case ISD::JumpTable:
2600 return LowerJumpTable(Op, DAG);
2601 case ISD::ConstantPool:
2602 return LowerConstantPool(Op, DAG);
2603 case ISD::BlockAddress:
2604 return LowerBlockAddress(Op, DAG);
2605 case ISD::VASTART:
2606 return LowerVASTART(Op, DAG);
2607 case ISD::VACOPY:
2608 return LowerVACOPY(Op, DAG);
2609 case ISD::VAARG:
2610 return LowerVAARG(Op, DAG);
2611 case ISD::ADDC:
2612 case ISD::ADDE:
2613 case ISD::SUBC:
2614 case ISD::SUBE:
2615 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
2616 case ISD::SADDO:
2617 case ISD::UADDO:
2618 case ISD::SSUBO:
2619 case ISD::USUBO:
2620 case ISD::SMULO:
2621 case ISD::UMULO:
2622 return LowerXALUO(Op, DAG);
2623 case ISD::FADD:
2624 return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
2625 case ISD::FSUB:
2626 return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
2627 case ISD::FMUL:
2628 return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
2629 case ISD::FDIV:
2630 return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
2631 case ISD::FP_ROUND:
2632 return LowerFP_ROUND(Op, DAG);
2633 case ISD::FP_EXTEND:
2634 return LowerFP_EXTEND(Op, DAG);
2635 case ISD::FRAMEADDR:
2636 return LowerFRAMEADDR(Op, DAG);
2637 case ISD::RETURNADDR:
2638 return LowerRETURNADDR(Op, DAG);
2639 case ISD::INSERT_VECTOR_ELT:
2640 return LowerINSERT_VECTOR_ELT(Op, DAG);
2641 case ISD::EXTRACT_VECTOR_ELT:
2642 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2643 case ISD::BUILD_VECTOR:
2644 return LowerBUILD_VECTOR(Op, DAG);
2645 case ISD::VECTOR_SHUFFLE:
2646 return LowerVECTOR_SHUFFLE(Op, DAG);
2647 case ISD::EXTRACT_SUBVECTOR:
2648 return LowerEXTRACT_SUBVECTOR(Op, DAG);
2649 case ISD::SRA:
2650 case ISD::SRL:
2651 case ISD::SHL:
2652 return LowerVectorSRA_SRL_SHL(Op, DAG);
2653 case ISD::SHL_PARTS:
2654 return LowerShiftLeftParts(Op, DAG);
2655 case ISD::SRL_PARTS:
2656 case ISD::SRA_PARTS:
2657 return LowerShiftRightParts(Op, DAG);
2658 case ISD::CTPOP:
2659 return LowerCTPOP(Op, DAG);
2660 case ISD::FCOPYSIGN:
2661 return LowerFCOPYSIGN(Op, DAG);
2662 case ISD::AND:
2663 return LowerVectorAND(Op, DAG);
2664 case ISD::OR:
2665 return LowerVectorOR(Op, DAG);
2666 case ISD::XOR:
2667 return LowerXOR(Op, DAG);
2668 case ISD::PREFETCH:
2669 return LowerPREFETCH(Op, DAG);
2670 case ISD::SINT_TO_FP:
2671 case ISD::UINT_TO_FP:
2672 return LowerINT_TO_FP(Op, DAG);
2673 case ISD::FP_TO_SINT:
2674 case ISD::FP_TO_UINT:
2675 return LowerFP_TO_INT(Op, DAG);
2676 case ISD::FSINCOS:
2677 return LowerFSINCOS(Op, DAG);
2678 case ISD::MUL:
2679 return LowerMUL(Op, DAG);
2680 case ISD::INTRINSIC_WO_CHAIN:
2681 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2682 case ISD::VECREDUCE_ADD:
2683 case ISD::VECREDUCE_SMAX:
2684 case ISD::VECREDUCE_SMIN:
2685 case ISD::VECREDUCE_UMAX:
2686 case ISD::VECREDUCE_UMIN:
2687 case ISD::VECREDUCE_FMAX:
2688 case ISD::VECREDUCE_FMIN:
2689 return LowerVECREDUCE(Op, DAG);
2690 case ISD::ATOMIC_LOAD_SUB:
2691 return LowerATOMIC_LOAD_SUB(Op, DAG);
2692 case ISD::ATOMIC_LOAD_AND:
2693 return LowerATOMIC_LOAD_AND(Op, DAG);
2694 case ISD::DYNAMIC_STACKALLOC:
2695 return LowerDYNAMIC_STACKALLOC(Op, DAG);
2696 }
2697}
2698
2699//===----------------------------------------------------------------------===//
2700// Calling Convention Implementation
2701//===----------------------------------------------------------------------===//
2702
2703#include "AArch64GenCallingConv.inc"
2704
2705/// Selects the correct CCAssignFn for a given CallingConvention value.
2706CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2707 bool IsVarArg) const {
2708 switch (CC) {
2709 default:
2710 report_fatal_error("Unsupported calling convention.");
2711 case CallingConv::WebKit_JS:
2712 return CC_AArch64_WebKit_JS;
2713 case CallingConv::GHC:
2714 return CC_AArch64_GHC;
2715 case CallingConv::C:
2716 case CallingConv::Fast:
2717 case CallingConv::PreserveMost:
2718 case CallingConv::CXX_FAST_TLS:
2719 case CallingConv::Swift:
2720 if (Subtarget->isTargetWindows() && IsVarArg)
2721 return CC_AArch64_Win64_VarArg;
2722 if (!Subtarget->isTargetDarwin())
2723 return CC_AArch64_AAPCS;
2724 return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
2725 case CallingConv::Win64:
2726 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
2727 }
2728}
2729
2730CCAssignFn *
2731AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
2732 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2733 : RetCC_AArch64_AAPCS;
2734}
2735
2736SDValue AArch64TargetLowering::LowerFormalArguments(
2737 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2738 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2739 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2740 MachineFunction &MF = DAG.getMachineFunction();
2741 MachineFrameInfo &MFI = MF.getFrameInfo();
2742 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
2743
2744 // Assign locations to all of the incoming arguments.
2745 SmallVector<CCValAssign, 16> ArgLocs;
2746 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2747 *DAG.getContext());
2748
2749 // At this point, Ins[].VT may already be promoted to i32. To correctly
2750 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
2751 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
2752 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
2753 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
2754 // LocVT.
2755 unsigned NumArgs = Ins.size();
2756 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
2757 unsigned CurArgIdx = 0;
2758 for (unsigned i = 0; i != NumArgs; ++i) {
2759 MVT ValVT = Ins[i].VT;
2760 if (Ins[i].isOrigArg()) {
2761 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
2762 CurArgIdx = Ins[i].getOrigArgIndex();
2763
2764 // Get type of the original argument.
2765 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
2766 /*AllowUnknown*/ true);
2767 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
2768 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
2769 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
2770 ValVT = MVT::i8;
2771 else if (ActualMVT == MVT::i16)
2772 ValVT = MVT::i16;
2773 }
2774 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
2775 bool Res =
2776 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
2777 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2777, __extension__ __PRETTY_FUNCTION__))
;
2778 (void)Res;
2779 }
2780 assert(ArgLocs.size() == Ins.size())(static_cast <bool> (ArgLocs.size() == Ins.size()) ? void
(0) : __assert_fail ("ArgLocs.size() == Ins.size()", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2780, __extension__ __PRETTY_FUNCTION__))
;
2781 SmallVector<SDValue, 16> ArgValues;
2782 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2783 CCValAssign &VA = ArgLocs[i];
2784
2785 if (Ins[i].Flags.isByVal()) {
2786 // Byval is used for HFAs in the PCS, but the system should work in a
2787 // non-compliant manner for larger structs.
2788 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2789 int Size = Ins[i].Flags.getByValSize();
2790 unsigned NumRegs = (Size + 7) / 8;
2791
2792 // FIXME: This works on big-endian for composite byvals, which are the common
2793 // case. It should also work for fundamental types too.
2794 unsigned FrameIdx =
2795 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
2796 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
2797 InVals.push_back(FrameIdxN);
2798
2799 continue;
2800 }
2801
2802 if (VA.isRegLoc()) {
2803 // Arguments stored in registers.
2804 EVT RegVT = VA.getLocVT();
2805
2806 SDValue ArgValue;
2807 const TargetRegisterClass *RC;
2808
2809 if (RegVT == MVT::i32)
2810 RC = &AArch64::GPR32RegClass;
2811 else if (RegVT == MVT::i64)
2812 RC = &AArch64::GPR64RegClass;
2813 else if (RegVT == MVT::f16)
2814 RC = &AArch64::FPR16RegClass;
2815 else if (RegVT == MVT::f32)
2816 RC = &AArch64::FPR32RegClass;
2817 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
2818 RC = &AArch64::FPR64RegClass;
2819 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
2820 RC = &AArch64::FPR128RegClass;
2821 else
2822 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2822)
;
2823
2824 // Transform the arguments in physical registers into virtual ones.
2825 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2826 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
2827
2828 // If this is an 8, 16 or 32-bit value, it is really passed promoted
2829 // to 64 bits. Insert an assert[sz]ext to capture this, then
2830 // truncate to the right size.
2831 switch (VA.getLocInfo()) {
2832 default:
2833 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2833)
;
2834 case CCValAssign::Full:
2835 break;
2836 case CCValAssign::BCvt:
2837 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
2838 break;
2839 case CCValAssign::AExt:
2840 case CCValAssign::SExt:
2841 case CCValAssign::ZExt:
2842 // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
2843 // nodes after our lowering.
2844 assert(RegVT == Ins[i].VT && "incorrect register location selected")(static_cast <bool> (RegVT == Ins[i].VT && "incorrect register location selected"
) ? void (0) : __assert_fail ("RegVT == Ins[i].VT && \"incorrect register location selected\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2844, __extension__ __PRETTY_FUNCTION__))
;
2845 break;
2846 }
2847
2848 InVals.push_back(ArgValue);
2849
2850 } else { // VA.isRegLoc()
2851 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")(static_cast <bool> (VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2851, __extension__ __PRETTY_FUNCTION__))
;
2852 unsigned ArgOffset = VA.getLocMemOffset();
2853 unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
2854
2855 uint32_t BEAlign = 0;
2856 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
2857 !Ins[i].Flags.isInConsecutiveRegs())
2858 BEAlign = 8 - ArgSize;
2859
2860 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
2861
2862 // Create load nodes to retrieve arguments from the stack.
2863 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2864 SDValue ArgValue;
2865
2866 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
2867 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
2868 MVT MemVT = VA.getValVT();
2869
2870 switch (VA.getLocInfo()) {
2871 default:
2872 break;
2873 case CCValAssign::BCvt:
2874 MemVT = VA.getLocVT();
2875 break;
2876 case CCValAssign::SExt:
2877 ExtType = ISD::SEXTLOAD;
2878 break;
2879 case CCValAssign::ZExt:
2880 ExtType = ISD::ZEXTLOAD;
2881 break;
2882 case CCValAssign::AExt:
2883 ExtType = ISD::EXTLOAD;
2884 break;
2885 }
2886
2887 ArgValue = DAG.getExtLoad(
2888 ExtType, DL, VA.getLocVT(), Chain, FIN,
2889 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
2890 MemVT);
2891
2892 InVals.push_back(ArgValue);
2893 }
2894 }
2895
2896 // varargs
2897 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2898 if (isVarArg) {
2899 if (!Subtarget->isTargetDarwin() || IsWin64) {
2900 // The AAPCS variadic function ABI is identical to the non-variadic
2901 // one. As a result there may be more arguments in registers and we should
2902 // save them for future reference.
2903 // Win64 variadic functions also pass arguments in registers, but all float
2904 // arguments are passed in integer registers.
2905 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
2906 }
2907
2908 // This will point to the next argument passed via stack.
2909 unsigned StackOffset = CCInfo.getNextStackOffset();
2910 // We currently pass all varargs at 8-byte alignment.
2911 StackOffset = ((StackOffset + 7) & ~7);
2912 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
2913 }
2914
2915 unsigned StackArgSize = CCInfo.getNextStackOffset();
2916 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
2917 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
2918 // This is a non-standard ABI so by fiat I say we're allowed to make full
2919 // use of the stack area to be popped, which must be aligned to 16 bytes in
2920 // any case:
2921 StackArgSize = alignTo(StackArgSize, 16);
2922
2923 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
2924 // a multiple of 16.
2925 FuncInfo->setArgumentStackToRestore(StackArgSize);
2926
2927 // This realignment carries over to the available bytes below. Our own
2928 // callers will guarantee the space is free by giving an aligned value to
2929 // CALLSEQ_START.
2930 }
2931 // Even if we're not expected to free up the space, it's useful to know how
2932 // much is there while considering tail calls (because we can reuse it).
2933 FuncInfo->setBytesInStackArgArea(StackArgSize);
2934
2935 return Chain;
2936}
2937
2938void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
2939 SelectionDAG &DAG,
2940 const SDLoc &DL,
2941 SDValue &Chain) const {
2942 MachineFunction &MF = DAG.getMachineFunction();
2943 MachineFrameInfo &MFI = MF.getFrameInfo();
2944 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2945 auto PtrVT = getPointerTy(DAG.getDataLayout());
2946 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
2947
2948 SmallVector<SDValue, 8> MemOps;
2949
2950 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
2951 AArch64::X3, AArch64::X4, AArch64::X5,
2952 AArch64::X6, AArch64::X7 };
2953 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
2954 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
2955
2956 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
2957 int GPRIdx = 0;
2958 if (GPRSaveSize != 0) {
2959 if (IsWin64) {
2960 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
2961 if (GPRSaveSize & 15)
2962 // The extra size here, if triggered, will always be 8.
2963 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
2964 } else
2965 GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
2966
2967 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
2968
2969 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
2970 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
2971 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
2972 SDValue Store = DAG.getStore(
2973 Val.getValue(1), DL, Val, FIN,
2974 IsWin64
2975 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
2976 GPRIdx,
2977 (i - FirstVariadicGPR) * 8)
2978 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
2979 MemOps.push_back(Store);
2980 FIN =
2981 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
2982 }
2983 }
2984 FuncInfo->setVarArgsGPRIndex(GPRIdx);
2985 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
2986
2987 if (Subtarget->hasFPARMv8() && !IsWin64) {
2988 static const MCPhysReg FPRArgRegs[] = {
2989 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
2990 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
2991 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
2992 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
2993
2994 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
2995 int FPRIdx = 0;
2996 if (FPRSaveSize != 0) {
2997 FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
2998
2999 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
3000
3001 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
3002 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
3003 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
3004
3005 SDValue Store = DAG.getStore(
3006 Val.getValue(1), DL, Val, FIN,
3007 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
3008 MemOps.push_back(Store);
3009 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3010 DAG.getConstant(16, DL, PtrVT));
3011 }
3012 }
3013 FuncInfo->setVarArgsFPRIndex(FPRIdx);
3014 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3015 }
3016
3017 if (!MemOps.empty()) {
3018 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3019 }
3020}
3021
3022/// LowerCallResult - Lower the result values of a call into the
3023/// appropriate copies out of appropriate physical registers.
3024SDValue AArch64TargetLowering::LowerCallResult(
3025 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3026 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3027 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3028 SDValue ThisVal) const {
3029 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3030 ? RetCC_AArch64_WebKit_JS
3031 : RetCC_AArch64_AAPCS;
3032 // Assign locations to each value returned by this call.
3033 SmallVector<CCValAssign, 16> RVLocs;
3034 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3035 *DAG.getContext());
3036 CCInfo.AnalyzeCallResult(Ins, RetCC);
3037
3038 // Copy all of the result registers out of their specified physreg.
3039 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3040 CCValAssign VA = RVLocs[i];
3041
3042 // Pass 'this' value directly from the argument to return value, to avoid
3043 // reg unit interference
3044 if (i == 0 && isThisReturn) {
3045 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3046, __extension__ __PRETTY_FUNCTION__))
3046 "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3046, __extension__ __PRETTY_FUNCTION__))
;
3047 InVals.push_back(ThisVal);
3048 continue;
3049 }
3050
3051 SDValue Val =
3052 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3053 Chain = Val.getValue(1);
3054 InFlag = Val.getValue(2);
3055
3056 switch (VA.getLocInfo()) {
3057 default:
3058 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3058)
;
3059 case CCValAssign::Full:
3060 break;
3061 case CCValAssign::BCvt:
3062 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3063 break;
3064 }
3065
3066 InVals.push_back(Val);
3067 }
3068
3069 return Chain;
3070}
3071
3072/// Return true if the calling convention is one that we can guarantee TCO for.
3073static bool canGuaranteeTCO(CallingConv::ID CC) {
3074 return CC == CallingConv::Fast;
3075}
3076
3077/// Return true if we might ever do TCO for calls with this calling convention.
3078static bool mayTailCallThisCC(CallingConv::ID CC) {
3079 switch (CC) {
3080 case CallingConv::C:
3081 case CallingConv::PreserveMost:
3082 case CallingConv::Swift:
3083 return true;
3084 default:
3085 return canGuaranteeTCO(CC);
3086 }
3087}
3088
3089bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3090 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3091 const SmallVectorImpl<ISD::OutputArg> &Outs,
3092 const SmallVectorImpl<SDValue> &OutVals,
3093 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3094 if (!mayTailCallThisCC(CalleeCC))
3095 return false;
3096
3097 MachineFunction &MF = DAG.getMachineFunction();
3098 const Function &CallerF = MF.getFunction();
3099 CallingConv::ID CallerCC = CallerF.getCallingConv();
3100 bool CCMatch = CallerCC == CalleeCC;
3101
3102 // Byval parameters hand the function a pointer directly into the stack area
3103 // we want to reuse during a tail call. Working around this *is* possible (see
3104 // X86) but less efficient and uglier in LowerCall.
3105 for (Function::const_arg_iterator i = CallerF.arg_begin(),
3106 e = CallerF.arg_end();
3107 i != e; ++i)
3108 if (i->hasByValAttr())
3109 return false;
3110
3111 if (getTargetMachine().Options.GuaranteedTailCallOpt)
3112 return canGuaranteeTCO(CalleeCC) && CCMatch;
3113
3114 // Externally-defined functions with weak linkage should not be
3115 // tail-called on AArch64 when the OS does not support dynamic
3116 // pre-emption of symbols, as the AAELF spec requires normal calls
3117 // to undefined weak functions to be replaced with a NOP or jump to the
3118 // next instruction. The behaviour of branch instructions in this
3119 // situation (as used for tail calls) is implementation-defined, so we
3120 // cannot rely on the linker replacing the tail call with a return.
3121 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3122 const GlobalValue *GV = G->getGlobal();
3123 const Triple &TT = getTargetMachine().getTargetTriple();
3124 if (GV->hasExternalWeakLinkage() &&
3125 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3126 return false;
3127 }
3128
3129 // Now we search for cases where we can use a tail call without changing the
3130 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3131 // concept.
3132
3133 // I want anyone implementing a new calling convention to think long and hard
3134 // about this assert.
3135 assert((!isVarArg || CalleeCC == CallingConv::C) &&(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3136, __extension__ __PRETTY_FUNCTION__))
3136 "Unexpected variadic calling convention")(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3136, __extension__ __PRETTY_FUNCTION__))
;
3137
3138 LLVMContext &C = *DAG.getContext();
3139 if (isVarArg && !Outs.empty()) {
3140 // At least two cases here: if caller is fastcc then we can't have any
3141 // memory arguments (we'd be expected to clean up the stack afterwards). If
3142 // caller is C then we could potentially use its argument area.
3143
3144 // FIXME: for now we take the most conservative of these in both cases:
3145 // disallow all variadic memory operands.
3146 SmallVector<CCValAssign, 16> ArgLocs;
3147 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3148
3149 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3150 for (const CCValAssign &ArgLoc : ArgLocs)
3151 if (!ArgLoc.isRegLoc())
3152 return false;
3153 }
3154
3155 // Check that the call results are passed in the same way.
3156 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3157 CCAssignFnForCall(CalleeCC, isVarArg),
3158 CCAssignFnForCall(CallerCC, isVarArg)))
3159 return false;
3160 // The callee has to preserve all registers the caller needs to preserve.
3161 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3162 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3163 if (!CCMatch) {
3164 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3165 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3166 return false;
3167 }
3168
3169 // Nothing more to check if the callee is taking no arguments
3170 if (Outs.empty())
3171 return true;
3172
3173 SmallVector<CCValAssign, 16> ArgLocs;
3174 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3175
3176 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3177
3178 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3179
3180 // If the stack arguments for this call do not fit into our own save area then
3181 // the call cannot be made tail.
3182 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3183 return false;
3184
3185 const MachineRegisterInfo &MRI = MF.getRegInfo();
3186 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3187 return false;
3188
3189 return true;
3190}
3191
3192SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3193 SelectionDAG &DAG,
3194 MachineFrameInfo &MFI,
3195 int ClobberedFI) const {
3196 SmallVector<SDValue, 8> ArgChains;
3197 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3198 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3199
3200 // Include the original chain at the beginning of the list. When this is
3201 // used by target LowerCall hooks, this helps legalize find the
3202 // CALLSEQ_BEGIN node.
3203 ArgChains.push_back(Chain);
3204
3205 // Add a chain value for each stack argument corresponding
3206 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
3207 UE = DAG.getEntryNode().getNode()->use_end();
3208 U != UE; ++U)
3209 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
3210 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
3211 if (FI->getIndex() < 0) {
3212 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
3213 int64_t InLastByte = InFirstByte;
3214 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
3215
3216 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
3217 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
3218 ArgChains.push_back(SDValue(L, 1));
3219 }
3220
3221 // Build a tokenfactor for all the chains.
3222 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
3223}
3224
3225bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
3226 bool TailCallOpt) const {
3227 return CallCC == CallingConv::Fast && TailCallOpt;
3228}
3229
3230/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
3231/// and add input and output parameter nodes.
3232SDValue
3233AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
3234 SmallVectorImpl<SDValue> &InVals) const {
3235 SelectionDAG &DAG = CLI.DAG;
3236 SDLoc &DL = CLI.DL;
3237 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
3238 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
3239 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
3240 SDValue Chain = CLI.Chain;
3241 SDValue Callee = CLI.Callee;
3242 bool &IsTailCall = CLI.IsTailCall;
3243 CallingConv::ID CallConv = CLI.CallConv;
3244 bool IsVarArg = CLI.IsVarArg;
3245
3246 MachineFunction &MF = DAG.getMachineFunction();
3247 bool IsThisReturn = false;
3248
3249 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3250 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3251 bool IsSibCall = false;
3252
3253 if (IsTailCall) {
3254 // Check if it's really possible to do a tail call.
3255 IsTailCall = isEligibleForTailCallOptimization(
3256 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3257 if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
3258 report_fatal_error("failed to perform tail call elimination on a call "
3259 "site marked musttail");
3260
3261 // A sibling call is one where we're under the usual C ABI and not planning
3262 // to change that but can still do a tail call:
3263 if (!TailCallOpt && IsTailCall)
3264 IsSibCall = true;
3265
3266 if (IsTailCall)
3267 ++NumTailCalls;
3268 }
3269
3270 // Analyze operands of the call, assigning locations to each operand.
3271 SmallVector<CCValAssign, 16> ArgLocs;
3272 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
3273 *DAG.getContext());
3274
3275 if (IsVarArg) {
3276 // Handle fixed and variable vector arguments differently.
3277 // Variable vector arguments always go into memory.
3278 unsigned NumArgs = Outs.size();
3279
3280 for (unsigned i = 0; i != NumArgs; ++i) {
3281 MVT ArgVT = Outs[i].VT;
3282 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3283 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
3284 /*IsVarArg=*/ !Outs[i].IsFixed);
3285 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
3286 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3286, __extension__ __PRETTY_FUNCTION__))
;
3287 (void)Res;
3288 }
3289 } else {
3290 // At this point, Outs[].VT may already be promoted to i32. To correctly
3291 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3292 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3293 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
3294 // we use a special version of AnalyzeCallOperands to pass in ValVT and
3295 // LocVT.
3296 unsigned NumArgs = Outs.size();
3297 for (unsigned i = 0; i != NumArgs; ++i) {
3298 MVT ValVT = Outs[i].VT;
3299 // Get type of the original argument.
3300 EVT ActualVT = getValueType(DAG.getDataLayout(),
3301 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
3302 /*AllowUnknown*/ true);
3303 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
3304 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3305 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3306 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3307 ValVT = MVT::i8;
3308 else if (ActualMVT == MVT::i16)
3309 ValVT = MVT::i16;
3310
3311 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3312 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
3313 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3313, __extension__ __PRETTY_FUNCTION__))
;
3314 (void)Res;
3315 }
3316 }
3317
3318 // Get a count of how many bytes are to be pushed on the stack.
3319 unsigned NumBytes = CCInfo.getNextStackOffset();
3320
3321 if (IsSibCall) {
3322 // Since we're not changing the ABI to make this a tail call, the memory
3323 // operands are already available in the caller's incoming argument space.
3324 NumBytes = 0;
3325 }
3326
3327 // FPDiff is the byte offset of the call's argument area from the callee's.
3328 // Stores to callee stack arguments will be placed in FixedStackSlots offset
3329 // by this amount for a tail call. In a sibling call it must be 0 because the
3330 // caller will deallocate the entire stack and the callee still expects its
3331 // arguments to begin at SP+0. Completely unused for non-tail calls.
3332 int FPDiff = 0;
3333
3334 if (IsTailCall && !IsSibCall) {
3335 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
3336
3337 // Since callee will pop argument stack as a tail call, we must keep the
3338 // popped size 16-byte aligned.
3339 NumBytes = alignTo(NumBytes, 16);
3340
3341 // FPDiff will be negative if this tail call requires more space than we
3342 // would automatically have in our incoming argument space. Positive if we
3343 // can actually shrink the stack.
3344 FPDiff = NumReusableBytes - NumBytes;
3345
3346 // The stack pointer must be 16-byte aligned at all times it's used for a
3347 // memory operation, which in practice means at *all* times and in
3348 // particular across call boundaries. Therefore our own arguments started at
3349 // a 16-byte aligned SP and the delta applied for the tail call should
3350 // satisfy the same constraint.
3351 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")(static_cast <bool> (FPDiff % 16 == 0 && "unaligned stack on tail call"
) ? void (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3351, __extension__ __PRETTY_FUNCTION__))
;
3352 }
3353
3354 // Adjust the stack pointer for the new arguments...
3355 // These operations are automatically eliminated by the prolog/epilog pass
3356 if (!IsSibCall)
3357 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
3358
3359 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
3360 getPointerTy(DAG.getDataLayout()));
3361
3362 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3363 SmallVector<SDValue, 8> MemOpChains;
3364 auto PtrVT = getPointerTy(DAG.getDataLayout());
3365
3366 // Walk the register/memloc assignments, inserting copies/loads.
3367 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
3368 ++i, ++realArgIdx) {
3369 CCValAssign &VA = ArgLocs[i];
3370 SDValue Arg = OutVals[realArgIdx];
3371 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3372
3373 // Promote the value if needed.
3374 switch (VA.getLocInfo()) {
3375 default:
3376 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3376)
;
3377 case CCValAssign::Full:
3378 break;
3379 case CCValAssign::SExt:
3380 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
3381 break;
3382 case CCValAssign::ZExt:
3383 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3384 break;
3385 case CCValAssign::AExt:
3386 if (Outs[realArgIdx].ArgVT == MVT::i1) {
3387 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
3388 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3389 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
3390 }
3391 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
3392 break;
3393 case CCValAssign::BCvt:
3394 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3395 break;
3396 case CCValAssign::FPExt:
3397 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
3398 break;
3399 }
3400
3401 if (VA.isRegLoc()) {
3402 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
3403 Outs[0].VT == MVT::i64) {
3404 assert(VA.getLocVT() == MVT::i64 &&(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3405, __extension__ __PRETTY_FUNCTION__))
3405 "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3405, __extension__ __PRETTY_FUNCTION__))
;
3406 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3407, __extension__ __PRETTY_FUNCTION__))
3407 "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3407, __extension__ __PRETTY_FUNCTION__))
;
3408 IsThisReturn = true;
3409 }
3410 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3411 } else {
3412 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3412, __extension__ __PRETTY_FUNCTION__))
;
3413
3414 SDValue DstAddr;
3415 MachinePointerInfo DstInfo;
3416
3417 // FIXME: This works on big-endian for composite byvals, which are the
3418 // common case. It should also work for fundamental types too.
3419 uint32_t BEAlign = 0;
3420 unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
3421 : VA.getValVT().getSizeInBits();
3422 OpSize = (OpSize + 7) / 8;
3423 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
3424 !Flags.isInConsecutiveRegs()) {
3425 if (OpSize < 8)
3426 BEAlign = 8 - OpSize;
3427 }
3428 unsigned LocMemOffset = VA.getLocMemOffset();
3429 int32_t Offset = LocMemOffset + BEAlign;
3430 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3431 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3432
3433 if (IsTailCall) {
3434 Offset = Offset + FPDiff;
3435 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3436
3437 DstAddr = DAG.getFrameIndex(FI, PtrVT);
3438 DstInfo =
3439 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
3440
3441 // Make sure any stack arguments overlapping with where we're storing
3442 // are loaded before this eventual operation. Otherwise they'll be
3443 // clobbered.
3444 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
3445 } else {
3446 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3447
3448 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3449 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
3450 LocMemOffset);
3451 }
3452
3453 if (Outs[i].Flags.isByVal()) {
3454 SDValue SizeNode =
3455 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
3456 SDValue Cpy = DAG.getMemcpy(
3457 Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
3458 /*isVol = */ false, /*AlwaysInline = */ false,
3459 /*isTailCall = */ false,
3460 DstInfo, MachinePointerInfo());
3461
3462 MemOpChains.push_back(Cpy);
3463 } else {
3464 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
3465 // promoted to a legal register type i32, we should truncate Arg back to
3466 // i1/i8/i16.
3467 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
3468 VA.getValVT() == MVT::i16)
3469 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
3470
3471 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
3472 MemOpChains.push_back(Store);
3473 }
3474 }
3475 }
3476
3477 if (!MemOpChains.empty())
3478 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3479
3480 // Build a sequence of copy-to-reg nodes chained together with token chain
3481 // and flag operands which copy the outgoing args into the appropriate regs.
3482 SDValue InFlag;
3483 for (auto &RegToPass : RegsToPass) {
3484 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
3485 RegToPass.second, InFlag);
3486 InFlag = Chain.getValue(1);
3487 }
3488
3489 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
3490 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
3491 // node so that legalize doesn't hack it.
3492 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3493 auto GV = G->getGlobal();
3494 if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) ==
3495 AArch64II::MO_GOT) {
3496 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
3497 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3498 } else if (Subtarget->isTargetCOFF() && GV->hasDLLImportStorageClass()) {
3499 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3500, __extension__ __PRETTY_FUNCTION__))
3500 "Windows is the only supported COFF target")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3500, __extension__ __PRETTY_FUNCTION__))
;
3501 Callee = getGOT(G, DAG, AArch64II::MO_DLLIMPORT);
3502 } else {
3503 const GlobalValue *GV = G->getGlobal();
3504 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3505 }
3506 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3507 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3508 Subtarget->isTargetMachO()) {
3509 const char *Sym = S->getSymbol();
3510 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
3511 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3512 } else {
3513 const char *Sym = S->getSymbol();
3514 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
3515 }
3516 }
3517
3518 // We don't usually want to end the call-sequence here because we would tidy
3519 // the frame up *after* the call, however in the ABI-changing tail-call case
3520 // we've carefully laid out the parameters so that when sp is reset they'll be
3521 // in the correct location.
3522 if (IsTailCall && !IsSibCall) {
3523 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3524 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
3525 InFlag = Chain.getValue(1);
3526 }
3527
3528 std::vector<SDValue> Ops;
3529 Ops.push_back(Chain);
3530 Ops.push_back(Callee);
3531
3532 if (IsTailCall) {
3533 // Each tail call may have to adjust the stack by a different amount, so
3534 // this information must travel along with the operation for eventual
3535 // consumption by emitEpilogue.
3536 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
3537 }
3538
3539 // Add argument registers to the end of the list so that they are known live
3540 // into the call.
3541 for (auto &RegToPass : RegsToPass)
3542 Ops.push_back(DAG.getRegister(RegToPass.first,
3543 RegToPass.second.getValueType()));
3544
3545 // Add a register mask operand representing the call-preserved registers.
3546 const uint32_t *Mask;
3547 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3548 if (IsThisReturn) {
3549 // For 'this' returns, use the X0-preserving mask if applicable
3550 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
3551 if (!Mask) {
3552 IsThisReturn = false;
3553 Mask = TRI->getCallPreservedMask(MF, CallConv);
3554 }
3555 } else
3556 Mask = TRI->getCallPreservedMask(MF, CallConv);
3557
3558 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3558, __extension__ __PRETTY_FUNCTION__))
;
3559 Ops.push_back(DAG.getRegisterMask(Mask));
3560
3561 if (InFlag.getNode())
3562 Ops.push_back(InFlag);
3563
3564 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3565
3566 // If we're doing a tall call, use a TC_RETURN here rather than an
3567 // actual call instruction.
3568 if (IsTailCall) {
3569 MF.getFrameInfo().setHasTailCall();
3570 return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
3571 }
3572
3573 // Returns a chain and a flag for retval copy to use.
3574 Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
3575 InFlag = Chain.getValue(1);
3576
3577 uint64_t CalleePopBytes =
3578 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
3579
3580 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3581 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
3582 InFlag, DL);
3583 if (!Ins.empty())
3584 InFlag = Chain.getValue(1);
3585
3586 // Handle result values, copying them out of physregs into vregs that we
3587 // return.
3588 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
3589 InVals, IsThisReturn,
3590 IsThisReturn ? OutVals[0] : SDValue());
3591}
3592
3593bool AArch64TargetLowering::CanLowerReturn(
3594 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3595 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3596 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3597 ? RetCC_AArch64_WebKit_JS
3598 : RetCC_AArch64_AAPCS;
3599 SmallVector<CCValAssign, 16> RVLocs;
3600 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3601 return CCInfo.CheckReturn(Outs, RetCC);
3602}
3603
3604SDValue
3605AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3606 bool isVarArg,
3607 const SmallVectorImpl<ISD::OutputArg> &Outs,
3608 const SmallVectorImpl<SDValue> &OutVals,
3609 const SDLoc &DL, SelectionDAG &DAG) const {
3610 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3611 ? RetCC_AArch64_WebKit_JS
3612 : RetCC_AArch64_AAPCS;
3613 SmallVector<CCValAssign, 16> RVLocs;
3614 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3615 *DAG.getContext());
3616 CCInfo.AnalyzeReturn(Outs, RetCC);
3617
3618 // Copy the result values into the output registers.
3619 SDValue Flag;
3620 SmallVector<SDValue, 4> RetOps(1, Chain);
3621 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
3622 ++i, ++realRVLocIdx) {
3623 CCValAssign &VA = RVLocs[i];
3624 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3624, __extension__ __PRETTY_FUNCTION__))
;
3625 SDValue Arg = OutVals[realRVLocIdx];
3626
3627 switch (VA.getLocInfo()) {
3628 default:
3629 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3629)
;
3630 case CCValAssign::Full:
3631 if (Outs[i].ArgVT == MVT::i1) {
3632 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
3633 // value. This is strictly redundant on Darwin (which uses "zeroext
3634 // i1"), but will be optimised out before ISel.
3635 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3636 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3637 }
3638 break;
3639 case CCValAssign::BCvt:
3640 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3641 break;
3642 }
3643
3644 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
3645 Flag = Chain.getValue(1);
3646 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3647 }
3648 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3649 const MCPhysReg *I =
3650 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3651 if (I) {
3652 for (; *I; ++I) {
3653 if (AArch64::GPR64RegClass.contains(*I))
3654 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3655 else if (AArch64::FPR64RegClass.contains(*I))
3656 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3657 else
3658 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3658)
;
3659 }
3660 }
3661
3662 RetOps[0] = Chain; // Update chain.
3663
3664 // Add the flag if we have it.
3665 if (Flag.getNode())
3666 RetOps.push_back(Flag);
3667
3668 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
3669}
3670
3671//===----------------------------------------------------------------------===//
3672// Other Lowering Code
3673//===----------------------------------------------------------------------===//
3674
3675SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
3676 SelectionDAG &DAG,
3677 unsigned Flag) const {
3678 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
3679 N->getOffset(), Flag);
3680}
3681
3682SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
3683 SelectionDAG &DAG,
3684 unsigned Flag) const {
3685 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
3686}
3687
3688SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
3689 SelectionDAG &DAG,
3690 unsigned Flag) const {
3691 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
3692 N->getOffset(), Flag);
3693}
3694
3695SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
3696 SelectionDAG &DAG,
3697 unsigned Flag) const {
3698 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
3699}
3700
3701// (loadGOT sym)
3702template <class NodeTy>
3703SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
3704 unsigned Flags) const {
3705 DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
3706 SDLoc DL(N);
3707 EVT Ty = getPointerTy(DAG.getDataLayout());
3708 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
3709 // FIXME: Once remat is capable of dealing with instructions with register
3710 // operands, expand this into two nodes instead of using a wrapper node.
3711 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
3712}
3713
3714// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
3715template <class NodeTy>
3716SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
3717 unsigned Flags) const {
3718 DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
3719 SDLoc DL(N);
3720 EVT Ty = getPointerTy(DAG.getDataLayout());
3721 const unsigned char MO_NC = AArch64II::MO_NC;
3722 return DAG.getNode(
3723 AArch64ISD::WrapperLarge, DL, Ty,
3724 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
3725 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
3726 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
3727 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
3728}
3729
3730// (addlow (adrp %hi(sym)) %lo(sym))
3731template <class NodeTy>
3732SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3733 unsigned Flags) const {
3734 DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
3735 SDLoc DL(N);
3736 EVT Ty = getPointerTy(DAG.getDataLayout());
3737 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
3738 SDValue Lo = getTargetNode(N, Ty, DAG,
3739 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
3740 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
3741 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
3742}
3743
3744SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
3745 SelectionDAG &DAG) const {
3746 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
3747 const GlobalValue *GV = GN->getGlobal();
3748 const AArch64II::TOF TargetFlags =
3749 (GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
3750 : AArch64II::MO_NO_FLAG);
3751 unsigned char OpFlags =
3752 Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
3753 if (OpFlags != AArch64II::MO_NO_FLAG)
3754 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3755, __extension__ __PRETTY_FUNCTION__))
3755 "unexpected offset in global node")(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3755, __extension__ __PRETTY_FUNCTION__))
;
3756
3757 // This also catches the large code model case for Darwin.
3758 if ((OpFlags & AArch64II::MO_GOT) != 0) {
3759 return getGOT(GN, DAG, TargetFlags);
3760 }
3761
3762 SDValue Result;
3763 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
3764 Result = getAddrLarge(GN, DAG, TargetFlags);
3765 } else {
3766 Result = getAddr(GN, DAG, TargetFlags);
3767 }
3768 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3769 SDLoc DL(GN);
3770 if (GV->hasDLLImportStorageClass())
3771 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3772 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3773 return Result;
3774}
3775
3776/// \brief Convert a TLS address reference into the correct sequence of loads
3777/// and calls to compute the variable's address (for Darwin, currently) and
3778/// return an SDValue containing the final node.
3779
3780/// Darwin only has one TLS scheme which must be capable of dealing with the
3781/// fully general situation, in the worst case. This means:
3782/// + "extern __thread" declaration.
3783/// + Defined in a possibly unknown dynamic library.
3784///
3785/// The general system is that each __thread variable has a [3 x i64] descriptor
3786/// which contains information used by the runtime to calculate the address. The
3787/// only part of this the compiler needs to know about is the first xword, which
3788/// contains a function pointer that must be called with the address of the
3789/// entire descriptor in "x0".
3790///
3791/// Since this descriptor may be in a different unit, in general even the
3792/// descriptor must be accessed via an indirect load. The "ideal" code sequence
3793/// is:
3794/// adrp x0, _var@TLVPPAGE
3795/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
3796/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
3797/// ; the function pointer
3798/// blr x1 ; Uses descriptor address in x0
3799/// ; Address of _var is now in x0.
3800///
3801/// If the address of _var's descriptor *is* known to the linker, then it can
3802/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
3803/// a slight efficiency gain.
3804SDValue
3805AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
3806 SelectionDAG &DAG) const {
3807 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3808, __extension__ __PRETTY_FUNCTION__))
3808 "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3808, __extension__ __PRETTY_FUNCTION__))
;
3809
3810 SDLoc DL(Op);
3811 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3812 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3813
3814 SDValue TLVPAddr =
3815 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3816 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
3817
3818 // The first entry in the descriptor is a function pointer that we must call
3819 // to obtain the address of the variable.
3820 SDValue Chain = DAG.getEntryNode();
3821 SDValue FuncTLVGet = DAG.getLoad(
3822 MVT::i64, DL, Chain, DescAddr,
3823 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
3824 /* Alignment = */ 8,
3825 MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant |
3826 MachineMemOperand::MODereferenceable);
3827 Chain = FuncTLVGet.getValue(1);
3828
3829 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
3830 MFI.setAdjustsStack(true);
3831
3832 // TLS calls preserve all registers except those that absolutely must be
3833 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3834 // silly).
3835 const uint32_t *Mask =
3836 Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
3837
3838 // Finally, we can make the call. This is just a degenerate version of a
3839 // normal AArch64 call node: x0 takes the address of the descriptor, and
3840 // returns the address of the variable in this thread.
3841 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
3842 Chain =
3843 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3844 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
3845 DAG.getRegisterMask(Mask), Chain.getValue(1));
3846 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
3847}
3848
3849/// When accessing thread-local variables under either the general-dynamic or
3850/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
3851/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
3852/// is a function pointer to carry out the resolution.
3853///
3854/// The sequence is:
3855/// adrp x0, :tlsdesc:var
3856/// ldr x1, [x0, #:tlsdesc_lo12:var]
3857/// add x0, x0, #:tlsdesc_lo12:var
3858/// .tlsdesccall var
3859/// blr x1
3860/// (TPIDR_EL0 offset now in x0)
3861///
3862/// The above sequence must be produced unscheduled, to enable the linker to
3863/// optimize/relax this sequence.
3864/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
3865/// above sequence, and expanded really late in the compilation flow, to ensure
3866/// the sequence is produced as per above.
3867SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
3868 const SDLoc &DL,
3869 SelectionDAG &DAG) const {
3870 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3871
3872 SDValue Chain = DAG.getEntryNode();
3873 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3874
3875 Chain =
3876 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
3877 SDValue Glue = Chain.getValue(1);
3878
3879 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
3880}
3881
3882SDValue
3883AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
3884 SelectionDAG &DAG) const {
3885 assert(Subtarget->isTargetELF() && "This function expects an ELF target")(static_cast <bool> (Subtarget->isTargetELF() &&
"This function expects an ELF target") ? void (0) : __assert_fail
("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3885, __extension__ __PRETTY_FUNCTION__))
;
3886 assert(Subtarget->useSmallAddressing() &&(static_cast <bool> (Subtarget->useSmallAddressing()
&& "ELF TLS only supported in small memory model") ?
void (0) : __assert_fail ("Subtarget->useSmallAddressing() && \"ELF TLS only supported in small memory model\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3887, __extension__ __PRETTY_FUNCTION__))
3887 "ELF TLS only supported in small memory model")(static_cast <bool> (Subtarget->useSmallAddressing()
&& "ELF TLS only supported in small memory model") ?
void (0) : __assert_fail ("Subtarget->useSmallAddressing() && \"ELF TLS only supported in small memory model\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3887, __extension__ __PRETTY_FUNCTION__))
;
3888 // Different choices can be made for the maximum size of the TLS area for a
3889 // module. For the small address model, the default TLS size is 16MiB and the
3890 // maximum TLS size is 4GiB.
3891 // FIXME: add -mtls-size command line option and make it control the 16MiB
3892 // vs. 4GiB code sequence generation.
3893 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3894
3895 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
3896
3897 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
3898 if (Model == TLSModel::LocalDynamic)
3899 Model = TLSModel::GeneralDynamic;
3900 }
3901
3902 SDValue TPOff;
3903 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3904 SDLoc DL(Op);
3905 const GlobalValue *GV = GA->getGlobal();
3906
3907 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
3908
3909 if (Model == TLSModel::LocalExec) {
3910 SDValue HiVar = DAG.getTargetGlobalAddress(
3911 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
3912 SDValue LoVar = DAG.getTargetGlobalAddress(
3913 GV, DL, PtrVT, 0,
3914 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3915
3916 SDValue TPWithOff_lo =
3917 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
3918 HiVar,
3919 DAG.getTargetConstant(0, DL, MVT::i32)),
3920 0);
3921 SDValue TPWithOff =
3922 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
3923 LoVar,
3924 DAG.getTargetConstant(0, DL, MVT::i32)),
3925 0);
3926 return TPWithOff;
3927 } else if (Model == TLSModel::InitialExec) {
3928 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3929 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
3930 } else if (Model == TLSModel::LocalDynamic) {
3931 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
3932 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
3933 // the beginning of the module's TLS region, followed by a DTPREL offset
3934 // calculation.
3935
3936 // These accesses will need deduplicating if there's more than one.
3937 AArch64FunctionInfo *MFI =
3938 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
3939 MFI->incNumLocalDynamicTLSAccesses();
3940
3941 // The call needs a relocation too for linker relaxation. It doesn't make
3942 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
3943 // the address.
3944 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
3945 AArch64II::MO_TLS);
3946
3947 // Now we can calculate the offset from TPIDR_EL0 to this module's
3948 // thread-local area.
3949 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
3950
3951 // Now use :dtprel_whatever: operations to calculate this variable's offset
3952 // in its thread-storage area.
3953 SDValue HiVar = DAG.getTargetGlobalAddress(
3954 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
3955 SDValue LoVar = DAG.getTargetGlobalAddress(
3956 GV, DL, MVT::i64, 0,
3957 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3958
3959 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
3960 DAG.getTargetConstant(0, DL, MVT::i32)),
3961 0);
3962 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
3963 DAG.getTargetConstant(0, DL, MVT::i32)),
3964 0);
3965 } else if (Model == TLSModel::GeneralDynamic) {
3966 // The call needs a relocation too for linker relaxation. It doesn't make
3967 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
3968 // the address.
3969 SDValue SymAddr =
3970 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3971
3972 // Finally we can make a call to calculate the offset from tpidr_el0.
3973 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
3974 } else
3975 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3975)
;
3976
3977 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
3978}
3979
3980SDValue
3981AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
3982 SelectionDAG &DAG) const {
3983 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows specific TLS lowering") ? void (0) : __assert_fail (
"Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3983, __extension__ __PRETTY_FUNCTION__))
;
3984
3985 SDValue Chain = DAG.getEntryNode();
3986 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3987 SDLoc DL(Op);
3988
3989 SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
3990
3991 // Load the ThreadLocalStoragePointer from the TEB
3992 // A pointer to the TLS array is located at offset 0x58 from the TEB.
3993 SDValue TLSArray =
3994 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
3995 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
3996 Chain = TLSArray.getValue(1);
3997
3998 // Load the TLS index from the C runtime;
3999 // This does the same as getAddr(), but without having a GlobalAddressSDNode.
4000 // This also does the same as LOADgot, but using a generic i32 load,
4001 // while LOADgot only loads i64.
4002 SDValue TLSIndexHi =
4003 DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
4004 SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
4005 "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4006 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
4007 SDValue TLSIndex =
4008 DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
4009 TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
4010 Chain = TLSIndex.getValue(1);
4011
4012 // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
4013 // offset into the TLSArray.
4014 TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
4015 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
4016 DAG.getConstant(3, DL, PtrVT));
4017 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
4018 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
4019 MachinePointerInfo());
4020 Chain = TLS.getValue(1);
4021
4022 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4023 const GlobalValue *GV = GA->getGlobal();
4024 SDValue TGAHi = DAG.getTargetGlobalAddress(
4025 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4026 SDValue TGALo = DAG.getTargetGlobalAddress(
4027 GV, DL, PtrVT, 0,
4028 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4029
4030 // Add the offset from the start of the .tls section (section base).
4031 SDValue Addr =
4032 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
4033 DAG.getTargetConstant(0, DL, MVT::i32)),
4034 0);
4035 Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
4036 return Addr;
4037}
4038
4039SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
4040 SelectionDAG &DAG) const {
4041 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4042 if (DAG.getTarget().useEmulatedTLS())
4043 return LowerToTLSEmulatedModel(GA, DAG);
4044
4045 if (Subtarget->isTargetDarwin())
4046 return LowerDarwinGlobalTLSAddress(Op, DAG);
4047 if (Subtarget->isTargetELF())
4048 return LowerELFGlobalTLSAddress(Op, DAG);
4049 if (Subtarget->isTargetWindows())
4050 return LowerWindowsGlobalTLSAddress(Op, DAG);
4051
4052 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4052)
;
4053}
4054
4055SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4056 SDValue Chain = Op.getOperand(0);
4057 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4058 SDValue LHS = Op.getOperand(2);
4059 SDValue RHS = Op.getOperand(3);
4060 SDValue Dest = Op.getOperand(4);
4061 SDLoc dl(Op);
4062
4063 // Handle f128 first, since lowering it will result in comparing the return
4064 // value of a libcall against zero, which is just what the rest of LowerBR_CC
4065 // is expecting to deal with.
4066 if (LHS.getValueType() == MVT::f128) {
4067 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4068
4069 // If softenSetCCOperands returned a scalar, we need to compare the result
4070 // against zero to select between true and false values.
4071 if (!RHS.getNode()) {
4072 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4073 CC = ISD::SETNE;
4074 }
4075 }
4076
4077 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4078 // instruction.
4079 if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
4080 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4081 // Only lower legal XALUO ops.
4082 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4083 return SDValue();
4084
4085 // The actual operation with overflow check.
4086 AArch64CC::CondCode OFCC;
4087 SDValue Value, Overflow;
4088 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
4089
4090 if (CC == ISD::SETNE)
4091 OFCC = getInvertedCondCode(OFCC);
4092 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
4093
4094 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4095 Overflow);
4096 }
4097
4098 if (LHS.getValueType().isInteger()) {
4099 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4100, __extension__ __PRETTY_FUNCTION__))
4100 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4100, __extension__ __PRETTY_FUNCTION__))
;
4101
4102 // If the RHS of the comparison is zero, we can potentially fold this
4103 // to a specialized branch.
4104 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
4105 if (RHSC && RHSC->getZExtValue() == 0) {
4106 if (CC == ISD::SETEQ) {
4107 // See if we can use a TBZ to fold in an AND as well.
4108 // TBZ has a smaller branch displacement than CBZ. If the offset is
4109 // out of bounds, a late MI-layer pass rewrites branches.
4110 // 403.gcc is an example that hits this case.
4111 if (LHS.getOpcode() == ISD::AND &&
4112 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4113 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4114 SDValue Test = LHS.getOperand(0);
4115 uint64_t Mask = LHS.getConstantOperandVal(1);
4116 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
4117 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4118 Dest);
4119 }
4120
4121 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
4122 } else if (CC == ISD::SETNE) {
4123 // See if we can use a TBZ to fold in an AND as well.
4124 // TBZ has a smaller branch displacement than CBZ. If the offset is
4125 // out of bounds, a late MI-layer pass rewrites branches.
4126 // 403.gcc is an example that hits this case.
4127 if (LHS.getOpcode() == ISD::AND &&
4128 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4129 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4130 SDValue Test = LHS.getOperand(0);
4131 uint64_t Mask = LHS.getConstantOperandVal(1);
4132 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
4133 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4134 Dest);
4135 }
4136
4137 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
4138 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
4139 // Don't combine AND since emitComparison converts the AND to an ANDS
4140 // (a.k.a. TST) and the test in the test bit and branch instruction
4141 // becomes redundant. This would also increase register pressure.
4142 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4143 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
4144 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4145 }
4146 }
4147 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
4148 LHS.getOpcode() != ISD::AND) {
4149 // Don't combine AND since emitComparison converts the AND to an ANDS
4150 // (a.k.a. TST) and the test in the test bit and branch instruction
4151 // becomes redundant. This would also increase register pressure.
4152 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4153 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
4154 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4155 }
4156
4157 SDValue CCVal;
4158 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4159 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4160 Cmp);
4161 }
4162
4163 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4164, __extension__ __PRETTY_FUNCTION__))
4164 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4164, __extension__ __PRETTY_FUNCTION__))
;
4165
4166 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4167 // clean. Some of them require two branches to implement.
4168 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4169 AArch64CC::CondCode CC1, CC2;
4170 changeFPCCToAArch64CC(CC, CC1, CC2);
4171 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4172 SDValue BR1 =
4173 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
4174 if (CC2 != AArch64CC::AL) {
4175 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4176 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
4177 Cmp);
4178 }
4179
4180 return BR1;
4181}
4182
4183SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
4184 SelectionDAG &DAG) const {
4185 EVT VT = Op.getValueType();
4186 SDLoc DL(Op);
4187
4188 SDValue In1 = Op.getOperand(0);
4189 SDValue In2 = Op.getOperand(1);
4190 EVT SrcVT = In2.getValueType();
4191
4192 if (SrcVT.bitsLT(VT))
4193 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
4194 else if (SrcVT.bitsGT(VT))
4195 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
4196
4197 EVT VecVT;
4198 uint64_t EltMask;
4199 SDValue VecVal1, VecVal2;
4200
4201 auto setVecVal = [&] (int Idx) {
4202 if (!VT.isVector()) {
4203 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4204 DAG.getUNDEF(VecVT), In1);
4205 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4206 DAG.getUNDEF(VecVT), In2);
4207 } else {
4208 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
4209 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
4210 }
4211 };
4212
4213 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
4214 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
4215 EltMask = 0x80000000ULL;
4216 setVecVal(AArch64::ssub);
4217 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
4218 VecVT = MVT::v2i64;
4219
4220 // We want to materialize a mask with the high bit set, but the AdvSIMD
4221 // immediate moves cannot materialize that in a single instruction for
4222 // 64-bit elements. Instead, materialize zero and then negate it.
4223 EltMask = 0;
4224
4225 setVecVal(AArch64::dsub);
4226 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
4227 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
4228 EltMask = 0x8000ULL;
4229 setVecVal(AArch64::hsub);
4230 } else {
4231 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4231)
;
4232 }
4233
4234 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
4235
4236 // If we couldn't materialize the mask above, then the mask vector will be
4237 // the zero vector, and we need to negate it here.
4238 if (VT == MVT::f64 || VT == MVT::v2f64) {
4239 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
4240 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
4241 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
4242 }
4243
4244 SDValue Sel =
4245 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
4246
4247 if (VT == MVT::f16)
4248 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
4249 if (VT == MVT::f32)
4250 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
4251 else if (VT == MVT::f64)
4252 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
4253 else
4254 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
4255}
4256
4257SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
4258 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
4259 Attribute::NoImplicitFloat))
4260 return SDValue();
4261
4262 if (!Subtarget->hasNEON())
4263 return SDValue();
4264
4265 // While there is no integer popcount instruction, it can
4266 // be more efficiently lowered to the following sequence that uses
4267 // AdvSIMD registers/instructions as long as the copies to/from
4268 // the AdvSIMD registers are cheap.
4269 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
4270 // CNT V0.8B, V0.8B // 8xbyte pop-counts
4271 // ADDV B0, V0.8B // sum 8xbyte pop-counts
4272 // UMOV X0, V0.B[0] // copy byte result back to integer reg
4273 SDValue Val = Op.getOperand(0);
4274 SDLoc DL(Op);
4275 EVT VT = Op.getValueType();
4276
4277 if (VT == MVT::i32)
4278 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
4279 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
4280
4281 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
4282 SDValue UaddLV = DAG.getNode(
4283 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
4284 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
4285
4286 if (VT == MVT::i64)
4287 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
4288 return UaddLV;
4289}
4290
4291SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
4292
4293 if (Op.getValueType().isVector())
4294 return LowerVSETCC(Op, DAG);
4295
4296 SDValue LHS = Op.getOperand(0);
4297 SDValue RHS = Op.getOperand(1);
4298 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
4299 SDLoc dl(Op);
4300
4301 // We chose ZeroOrOneBooleanContents, so use zero and one.
4302 EVT VT = Op.getValueType();
4303 SDValue TVal = DAG.getConstant(1, dl, VT);
4304 SDValue FVal = DAG.getConstant(0, dl, VT);
4305
4306 // Handle f128 first, since one possible outcome is a normal integer
4307 // comparison which gets picked up by the next if statement.
4308 if (LHS.getValueType() == MVT::f128) {
4309 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4310
4311 // If softenSetCCOperands returned a scalar, use it.
4312 if (!RHS.getNode()) {
4313 assert(LHS.getValueType() == Op.getValueType() &&(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4314, __extension__ __PRETTY_FUNCTION__))
4314 "Unexpected setcc expansion!")(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4314, __extension__ __PRETTY_FUNCTION__))
;
4315 return LHS;
4316 }
4317 }
4318
4319 if (LHS.getValueType().isInteger()) {
4320 SDValue CCVal;
4321 SDValue Cmp =
4322 getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
4323
4324 // Note that we inverted the condition above, so we reverse the order of
4325 // the true and false operands here. This will allow the setcc to be
4326 // matched to a single CSINC instruction.
4327 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
4328 }
4329
4330 // Now we know we're dealing with FP values.
4331 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4332, __extension__ __PRETTY_FUNCTION__))
4332 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4332, __extension__ __PRETTY_FUNCTION__))
;
4333
4334 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
4335 // and do the comparison.
4336 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4337
4338 AArch64CC::CondCode CC1, CC2;
4339 changeFPCCToAArch64CC(CC, CC1, CC2);
4340 if (CC2 == AArch64CC::AL) {
4341 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
4342 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4343
4344 // Note that we inverted the condition above, so we reverse the order of
4345 // the true and false operands here. This will allow the setcc to be
4346 // matched to a single CSINC instruction.
4347 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
4348 } else {
4349 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
4350 // totally clean. Some of them require two CSELs to implement. As is in
4351 // this case, we emit the first CSEL and then emit a second using the output
4352 // of the first as the RHS. We're effectively OR'ing the two CC's together.
4353
4354 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
4355 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4356 SDValue CS1 =
4357 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4358
4359 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4360 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4361 }
4362}
4363
4364SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
4365 SDValue RHS, SDValue TVal,
4366 SDValue FVal, const SDLoc &dl,
4367 SelectionDAG &DAG) const {
4368 // Handle f128 first, because it will result in a comparison of some RTLIB
4369 // call result against zero.
4370 if (LHS.getValueType() == MVT::f128) {
4371 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4372
4373 // If softenSetCCOperands returned a scalar, we need to compare the result
4374 // against zero to select between true and false values.
4375 if (!RHS.getNode()) {
4376 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4377 CC = ISD::SETNE;
4378 }
4379 }
4380
4381 // Also handle f16, for which we need to do a f32 comparison.
4382 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4383 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
4384 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
4385 }
4386
4387 // Next, handle integers.
4388 if (LHS.getValueType().isInteger()) {
4389 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4390, __extension__ __PRETTY_FUNCTION__))
4390 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4390, __extension__ __PRETTY_FUNCTION__))
;
4391
4392 unsigned Opcode = AArch64ISD::CSEL;
4393
4394 // If both the TVal and the FVal are constants, see if we can swap them in
4395 // order to for a CSINV or CSINC out of them.
4396 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
4397 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
4398
4399 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
4400 std::swap(TVal, FVal);
4401 std::swap(CTVal, CFVal);
4402 CC = ISD::getSetCCInverse(CC, true);
4403 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
4404 std::swap(TVal, FVal);
4405 std::swap(CTVal, CFVal);
4406 CC = ISD::getSetCCInverse(CC, true);
4407 } else if (TVal.getOpcode() == ISD::XOR) {
4408 // If TVal is a NOT we want to swap TVal and FVal so that we can match
4409 // with a CSINV rather than a CSEL.
4410 if (isAllOnesConstant(TVal.getOperand(1))) {
4411 std::swap(TVal, FVal);
4412 std::swap(CTVal, CFVal);
4413 CC = ISD::getSetCCInverse(CC, true);
4414 }
4415 } else if (TVal.getOpcode() == ISD::SUB) {
4416 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
4417 // that we can match with a CSNEG rather than a CSEL.
4418 if (isNullConstant(TVal.getOperand(0))) {
4419 std::swap(TVal, FVal);
4420 std::swap(CTVal, CFVal);
4421 CC = ISD::getSetCCInverse(CC, true);
4422 }
4423 } else if (CTVal && CFVal) {
4424 const int64_t TrueVal = CTVal->getSExtValue();
4425 const int64_t FalseVal = CFVal->getSExtValue();
4426 bool Swap = false;
4427
4428 // If both TVal and FVal are constants, see if FVal is the
4429 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
4430 // instead of a CSEL in that case.
4431 if (TrueVal == ~FalseVal) {
4432 Opcode = AArch64ISD::CSINV;
4433 } else if (TrueVal == -FalseVal) {
4434 Opcode = AArch64ISD::CSNEG;
4435 } else if (TVal.getValueType() == MVT::i32) {
4436 // If our operands are only 32-bit wide, make sure we use 32-bit
4437 // arithmetic for the check whether we can use CSINC. This ensures that
4438 // the addition in the check will wrap around properly in case there is
4439 // an overflow (which would not be the case if we do the check with
4440 // 64-bit arithmetic).
4441 const uint32_t TrueVal32 = CTVal->getZExtValue();
4442 const uint32_t FalseVal32 = CFVal->getZExtValue();
4443
4444 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
4445 Opcode = AArch64ISD::CSINC;
4446
4447 if (TrueVal32 > FalseVal32) {
4448 Swap = true;
4449 }
4450 }
4451 // 64-bit check whether we can use CSINC.
4452 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
4453 Opcode = AArch64ISD::CSINC;
4454
4455 if (TrueVal > FalseVal) {
4456 Swap = true;
4457 }
4458 }
4459
4460 // Swap TVal and FVal if necessary.
4461 if (Swap) {
4462 std::swap(TVal, FVal);
4463 std::swap(CTVal, CFVal);
4464 CC = ISD::getSetCCInverse(CC, true);
4465 }
4466
4467 if (Opcode != AArch64ISD::CSEL) {
4468 // Drop FVal since we can get its value by simply inverting/negating
4469 // TVal.
4470 FVal = TVal;
4471 }
4472 }
4473
4474 // Avoid materializing a constant when possible by reusing a known value in
4475 // a register. However, don't perform this optimization if the known value
4476 // is one, zero or negative one in the case of a CSEL. We can always
4477 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
4478 // FVal, respectively.
4479 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
4480 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
4481 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
4482 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4483 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
4484 // "a != C ? x : a" to avoid materializing C.
4485 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
4486 TVal = LHS;
4487 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
4488 FVal = LHS;
4489 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
4490 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")(static_cast <bool> (CTVal && CFVal && "Expected constant operands for CSNEG."
) ? void (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4490, __extension__ __PRETTY_FUNCTION__))
;
4491 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
4492 // avoid materializing C.
4493 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4494 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
4495 Opcode = AArch64ISD::CSINV;
4496 TVal = LHS;
4497 FVal = DAG.getConstant(0, dl, FVal.getValueType());
4498 }
4499 }
4500
4501 SDValue CCVal;
4502 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4503 EVT VT = TVal.getValueType();
4504 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
4505 }
4506
4507 // Now we know we're dealing with FP values.
4508 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4509, __extension__ __PRETTY_FUNCTION__))
4509 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4509, __extension__ __PRETTY_FUNCTION__))
;
4510 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4510, __extension__ __PRETTY_FUNCTION__))
;
4511 EVT VT = TVal.getValueType();
4512 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4513
4514 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4515 // clean. Some of them require two CSELs to implement.
4516 AArch64CC::CondCode CC1, CC2;
4517 changeFPCCToAArch64CC(CC, CC1, CC2);
4518
4519 if (DAG.getTarget().Options.UnsafeFPMath) {
4520 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
4521 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
4522 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
4523 if (RHSVal && RHSVal->isZero()) {
4524 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
4525 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
4526
4527 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
4528 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
4529 TVal = LHS;
4530 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
4531 CFVal && CFVal->isZero() &&
4532 FVal.getValueType() == LHS.getValueType())
4533 FVal = LHS;
4534 }
4535 }
4536
4537 // Emit first, and possibly only, CSEL.
4538 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4539 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4540
4541 // If we need a second CSEL, emit it, using the output of the first as the
4542 // RHS. We're effectively OR'ing the two CC's together.
4543 if (CC2 != AArch64CC::AL) {
4544 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4545 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4546 }
4547
4548 // Otherwise, return the output of the first CSEL.
4549 return CS1;
4550}
4551
4552SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
4553 SelectionDAG &DAG) const {
4554 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4555 SDValue LHS = Op.getOperand(0);
4556 SDValue RHS = Op.getOperand(1);
4557 SDValue TVal = Op.getOperand(2);
4558 SDValue FVal = Op.getOperand(3);
4559 SDLoc DL(Op);
4560 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4561}
4562
4563SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
4564 SelectionDAG &DAG) const {
4565 SDValue CCVal = Op->getOperand(0);
4566 SDValue TVal = Op->getOperand(1);
4567 SDValue FVal = Op->getOperand(2);
4568 SDLoc DL(Op);
4569
4570 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
4571 // instruction.
4572 if (isOverflowIntrOpRes(CCVal)) {
4573 // Only lower legal XALUO ops.
4574 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
4575 return SDValue();
4576
4577 AArch64CC::CondCode OFCC;
4578 SDValue Value, Overflow;
4579 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
4580 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
4581
4582 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
4583 CCVal, Overflow);
4584 }
4585
4586 // Lower it the same way as we would lower a SELECT_CC node.
4587 ISD::CondCode CC;
4588 SDValue LHS, RHS;
4589 if (CCVal.getOpcode() == ISD::SETCC) {
4590 LHS = CCVal.getOperand(0);
4591 RHS = CCVal.getOperand(1);
4592 CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
4593 } else {
4594 LHS = CCVal;
4595 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
4596 CC = ISD::SETNE;
4597 }
4598 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4599}
4600
4601SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
4602 SelectionDAG &DAG) const {
4603 // Jump table entries as PC relative offsets. No additional tweaking
4604 // is necessary here. Just get the address of the jump table.
4605 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
4606
4607 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4608 !Subtarget->isTargetMachO()) {
4609 return getAddrLarge(JT, DAG);
4610 }
4611 return getAddr(JT, DAG);
4612}
4613
4614SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
4615 SelectionDAG &DAG) const {
4616 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
4617
4618 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4619 // Use the GOT for the large code model on iOS.
4620 if (Subtarget->isTargetMachO()) {
4621 return getGOT(CP, DAG);
4622 }
4623 return getAddrLarge(CP, DAG);
4624 } else {
4625 return getAddr(CP, DAG);
4626 }
4627}
4628
4629SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
4630 SelectionDAG &DAG) const {
4631 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
4632 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4633 !Subtarget->isTargetMachO()) {
4634 return getAddrLarge(BA, DAG);
4635 } else {
4636 return getAddr(BA, DAG);
4637 }
4638}
4639
4640SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
4641 SelectionDAG &DAG) const {
4642 AArch64FunctionInfo *FuncInfo =
4643 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4644
4645 SDLoc DL(Op);
4646 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
4647 getPointerTy(DAG.getDataLayout()));
4648 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4649 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4650 MachinePointerInfo(SV));
4651}
4652
4653SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
4654 SelectionDAG &DAG) const {
4655 AArch64FunctionInfo *FuncInfo =
4656 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4657
4658 SDLoc DL(Op);
4659 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
4660 ? FuncInfo->getVarArgsGPRIndex()
4661 : FuncInfo->getVarArgsStackIndex(),
4662 getPointerTy(DAG.getDataLayout()));
4663 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4664 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4665 MachinePointerInfo(SV));
4666}
4667
4668SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
4669 SelectionDAG &DAG) const {
4670 // The layout of the va_list struct is specified in the AArch64 Procedure Call
4671 // Standard, section B.3.
4672 MachineFunction &MF = DAG.getMachineFunction();
4673 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4674 auto PtrVT = getPointerTy(DAG.getDataLayout());
4675 SDLoc DL(Op);
4676
4677 SDValue Chain = Op.getOperand(0);
4678 SDValue VAList = Op.getOperand(1);
4679 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4680 SmallVector<SDValue, 4> MemOps;
4681
4682 // void *__stack at offset 0
4683 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
4684 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
4685 MachinePointerInfo(SV), /* Alignment = */ 8));
4686
4687 // void *__gr_top at offset 8
4688 int GPRSize = FuncInfo->getVarArgsGPRSize();
4689 if (GPRSize > 0) {
4690 SDValue GRTop, GRTopAddr;
4691
4692 GRTopAddr =
4693 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
4694
4695 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
4696 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
4697 DAG.getConstant(GPRSize, DL, PtrVT));
4698
4699 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
4700 MachinePointerInfo(SV, 8),
4701 /* Alignment = */ 8));
4702 }
4703
4704 // void *__vr_top at offset 16
4705 int FPRSize = FuncInfo->getVarArgsFPRSize();
4706 if (FPRSize > 0) {
4707 SDValue VRTop, VRTopAddr;
4708 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4709 DAG.getConstant(16, DL, PtrVT));
4710
4711 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
4712 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
4713 DAG.getConstant(FPRSize, DL, PtrVT));
4714
4715 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
4716 MachinePointerInfo(SV, 16),
4717 /* Alignment = */ 8));
4718 }
4719
4720 // int __gr_offs at offset 24
4721 SDValue GROffsAddr =
4722 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
4723 MemOps.push_back(DAG.getStore(
4724 Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
4725 MachinePointerInfo(SV, 24), /* Alignment = */ 4));
4726
4727 // int __vr_offs at offset 28
4728 SDValue VROffsAddr =
4729 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
4730 MemOps.push_back(DAG.getStore(
4731 Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
4732 MachinePointerInfo(SV, 28), /* Alignment = */ 4));
4733
4734 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4735}
4736
4737SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
4738 SelectionDAG &DAG) const {
4739 MachineFunction &MF = DAG.getMachineFunction();
4740
4741 if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
4742 return LowerWin64_VASTART(Op, DAG);
4743 else if (Subtarget->isTargetDarwin())
4744 return LowerDarwin_VASTART(Op, DAG);
4745 else
4746 return LowerAAPCS_VASTART(Op, DAG);
4747}
4748
4749SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
4750 SelectionDAG &DAG) const {
4751 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
4752 // pointer.
4753 SDLoc DL(Op);
4754 unsigned VaListSize =
4755 Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
4756 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4757 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4758
4759 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
4760 Op.getOperand(2),
4761 DAG.getConstant(VaListSize, DL, MVT::i32),
4762 8, false, false, false, MachinePointerInfo(DestSV),
4763 MachinePointerInfo(SrcSV));
4764}
4765
4766SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
4767 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4768, __extension__ __PRETTY_FUNCTION__))
4768 "automatic va_arg instruction only works on Darwin")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4768, __extension__ __PRETTY_FUNCTION__))
;
4769
4770 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4771 EVT VT = Op.getValueType();
4772 SDLoc DL(Op);
4773 SDValue Chain = Op.getOperand(0);
4774 SDValue Addr = Op.getOperand(1);
4775 unsigned Align = Op.getConstantOperandVal(3);
4776 auto PtrVT = getPointerTy(DAG.getDataLayout());
4777
4778 SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V));
4779 Chain = VAList.getValue(1);
4780
4781 if (Align > 8) {
4782 assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2")(static_cast <bool> (((Align & (Align - 1)) == 0) &&
"Expected Align to be a power of 2") ? void (0) : __assert_fail
("((Align & (Align - 1)) == 0) && \"Expected Align to be a power of 2\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4782, __extension__ __PRETTY_FUNCTION__))
;
4783 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4784 DAG.getConstant(Align - 1, DL, PtrVT));
4785 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
4786 DAG.getConstant(-(int64_t)Align, DL, PtrVT));
4787 }
4788
4789 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
4790 uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
4791
4792 // Scalar integer and FP values smaller than 64 bits are implicitly extended
4793 // up to 64 bits. At the very least, we have to increase the striding of the
4794 // vaargs list to match this, and for FP values we need to introduce
4795 // FP_ROUND nodes as well.
4796 if (VT.isInteger() && !VT.isVector())
4797 ArgSize = 8;
4798 bool NeedFPTrunc = false;
4799 if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
4800 ArgSize = 8;
4801 NeedFPTrunc = true;
4802 }
4803
4804 // Increment the pointer, VAList, to the next vaarg
4805 SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4806 DAG.getConstant(ArgSize, DL, PtrVT));
4807 // Store the incremented VAList to the legalized pointer
4808 SDValue APStore =
4809 DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
4810
4811 // Load the actual argument out of the pointer VAList
4812 if (NeedFPTrunc) {
4813 // Load the value as an f64.
4814 SDValue WideFP =
4815 DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
4816 // Round the value down to an f32.
4817 SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
4818 DAG.getIntPtrConstant(1, DL));
4819 SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
4820 // Merge the rounded value with the chain output of the load.
4821 return DAG.getMergeValues(Ops, DL);
4822 }
4823
4824 return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
4825}
4826
4827SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
4828 SelectionDAG &DAG) const {
4829 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4830 MFI.setFrameAddressIsTaken(true);
4831
4832 EVT VT = Op.getValueType();
4833 SDLoc DL(Op);
4834 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4835 SDValue FrameAddr =
4836 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
4837 while (Depth--)
4838 FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
4839 MachinePointerInfo());
4840 return FrameAddr;
4841}
4842
4843// FIXME? Maybe this could be a TableGen attribute on some registers and
4844// this table could be generated automatically from RegInfo.
4845unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
4846 SelectionDAG &DAG) const {
4847 unsigned Reg = StringSwitch<unsigned>(RegName)
4848 .Case("sp", AArch64::SP)
4849 .Case("x18", AArch64::X18)
4850 .Case("w18", AArch64::W18)
4851 .Default(0);
4852 if ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
4853 !Subtarget->isX18Reserved())
4854 Reg = 0;
4855 if (Reg)
4856 return Reg;
4857 report_fatal_error(Twine("Invalid register name \""
4858 + StringRef(RegName) + "\"."));
4859}
4860
4861SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
4862 SelectionDAG &DAG) const {
4863 MachineFunction &MF = DAG.getMachineFunction();
4864 MachineFrameInfo &MFI = MF.getFrameInfo();
4865 MFI.setReturnAddressIsTaken(true);
4866
4867 EVT VT = Op.getValueType();
4868 SDLoc DL(Op);
4869 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4870 if (Depth) {
4871 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4872 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
4873 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
4874 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
4875 MachinePointerInfo());
4876 }
4877
4878 // Return LR, which contains the return address. Mark it an implicit live-in.
4879 unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
4880 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
4881}
4882
4883/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4884/// i64 values and take a 2 x i64 value to shift plus a shift amount.
4885SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
4886 SelectionDAG &DAG) const {
4887 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4887, __extension__ __PRETTY_FUNCTION__))
;
4888 EVT VT = Op.getValueType();
4889 unsigned VTBits = VT.getSizeInBits();
4890 SDLoc dl(Op);
4891 SDValue ShOpLo = Op.getOperand(0);
4892 SDValue ShOpHi = Op.getOperand(1);
4893 SDValue ShAmt = Op.getOperand(2);
4894 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4895
4896 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SRA_PARTS ||
Op.getOpcode() == ISD::SRL_PARTS) ? void (0) : __assert_fail
("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4896, __extension__ __PRETTY_FUNCTION__))
;
4897
4898 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
4899 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
4900 SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4901
4902 // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
4903 // is "undef". We wanted 0, so CSEL it directly.
4904 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
4905 ISD::SETEQ, dl, DAG);
4906 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
4907 HiBitsForLo =
4908 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
4909 HiBitsForLo, CCVal, Cmp);
4910
4911 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
4912 DAG.getConstant(VTBits, dl, MVT::i64));
4913
4914 SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4915 SDValue LoForNormalShift =
4916 DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
4917
4918 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
4919 dl, DAG);
4920 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
4921 SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4922 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
4923 LoForNormalShift, CCVal, Cmp);
4924
4925 // AArch64 shifts larger than the register width are wrapped rather than
4926 // clamped, so we can't just emit "hi >> x".
4927 SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4928 SDValue HiForBigShift =
4929 Opc == ISD::SRA
4930 ? DAG.getNode(Opc, dl, VT, ShOpHi,
4931 DAG.getConstant(VTBits - 1, dl, MVT::i64))
4932 : DAG.getConstant(0, dl, VT);
4933 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
4934 HiForNormalShift, CCVal, Cmp);
4935
4936 SDValue Ops[2] = { Lo, Hi };
4937 return DAG.getMergeValues(Ops, dl);
4938}
4939
4940/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4941/// i64 values and take a 2 x i64 value to shift plus a shift amount.
4942SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
4943 SelectionDAG &DAG) const {
4944 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4944, __extension__ __PRETTY_FUNCTION__))
;
4945 EVT VT = Op.getValueType();
4946 unsigned VTBits = VT.getSizeInBits();
4947 SDLoc dl(Op);
4948 SDValue ShOpLo = Op.getOperand(0);
4949 SDValue ShOpHi = Op.getOperand(1);
4950 SDValue ShAmt = Op.getOperand(2);
4951
4952 assert(Op.getOpcode() == ISD::SHL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SHL_PARTS) ?
void (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4952, __extension__ __PRETTY_FUNCTION__))
;
4953 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
4954 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
4955 SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4956
4957 // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
4958 // is "undef". We wanted 0, so CSEL it directly.
4959 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
4960 ISD::SETEQ, dl, DAG);
4961 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
4962 LoBitsForHi =
4963 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
4964 LoBitsForHi, CCVal, Cmp);
4965
4966 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
4967 DAG.getConstant(VTBits, dl, MVT::i64));
4968 SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4969 SDValue HiForNormalShift =
4970 DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
4971
4972 SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4973
4974 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
4975 dl, DAG);
4976 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
4977 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
4978 HiForNormalShift, CCVal, Cmp);
4979
4980 // AArch64 shifts of larger than register sizes are wrapped rather than
4981 // clamped, so we can't just emit "lo << a" if a is too big.
4982 SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
4983 SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4984 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
4985 LoForNormalShift, CCVal, Cmp);
4986
4987 SDValue Ops[2] = { Lo, Hi };
4988 return DAG.getMergeValues(Ops, dl);
4989}
4990
4991bool AArch64TargetLowering::isOffsetFoldingLegal(
4992 const GlobalAddressSDNode *GA) const {
4993 // FIXME: Only ELF can represent the full range of possible addends here, as
4994 // the format stores the addend in a 64-bit field. With Mach-O the equivalent
4995 // field is 24 bits, and with COFF it is 21 bits. To make this work with the
4996 // other object formats we will need to arrange to prevent the addend from
4997 // going out of bounds.
4998 if (!getTargetMachine().getTargetTriple().isOSBinFormatELF())
4999 return false;
5000 return Subtarget->ClassifyGlobalReference(
5001 GA->getGlobal(), getTargetMachine()) == AArch64II::MO_NO_FLAG;
5002}
5003
5004bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
5005 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
5006 // FIXME: We should be able to handle f128 as well with a clever lowering.
5007 if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
5008 (VT == MVT::f16 && Subtarget->hasFullFP16()))) {
5009 DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal fp imm: materialize 0 using the zero register\n"
; } } while (false)
;
5010 return true;
5011 }
5012
5013 StringRef FPType;
5014 bool IsLegal = false;
5015 SmallString<128> ImmStrVal;
5016 Imm.toString(ImmStrVal);
5017
5018 if (VT == MVT::f64) {
5019 FPType = "f64";
5020 IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
5021 } else if (VT == MVT::f32) {
5022 FPType = "f32";
5023 IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
5024 } else if (VT == MVT::f16 && Subtarget->hasFullFP16()) {
5025 FPType = "f16";
5026 IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
5027 }
5028
5029 if (IsLegal) {
5030 DEBUG(dbgs() << "Legal " << FPType << " imm value: " << ImmStrVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal " << FPType
<< " imm value: " << ImmStrVal << "\n"; } }
while (false)
;
5031 return true;
5032 }
5033
5034 if (!FPType.empty())
5035 DEBUG(dbgs() << "Illegal " << FPType << " imm value: " << ImmStrVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal " << FPType
<< " imm value: " << ImmStrVal << "\n"; } }
while (false)
;
5036 else
5037 DEBUG(dbgs() << "Illegal fp imm " << ImmStrVal << ": unsupported fp type\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal fp imm " <<
ImmStrVal << ": unsupported fp type\n"; } } while (false
)
;
5038
5039 return false;
5040}
5041
5042//===----------------------------------------------------------------------===//
5043// AArch64 Optimization Hooks
5044//===----------------------------------------------------------------------===//
5045
5046static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
5047 SDValue Operand, SelectionDAG &DAG,
5048 int &ExtraSteps) {
5049 EVT VT = Operand.getValueType();
5050 if (ST->hasNEON() &&
5051 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
5052 VT == MVT::f32 || VT == MVT::v1f32 ||
5053 VT == MVT::v2f32 || VT == MVT::v4f32)) {
5054 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
5055 // For the reciprocal estimates, convergence is quadratic, so the number
5056 // of digits is doubled after each iteration. In ARMv8, the accuracy of
5057 // the initial estimate is 2^-8. Thus the number of extra steps to refine
5058 // the result for float (23 mantissa bits) is 2 and for double (52
5059 // mantissa bits) is 3.
5060 ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
5061
5062 return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
5063 }
5064
5065 return SDValue();
5066}
5067
5068SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
5069 SelectionDAG &DAG, int Enabled,
5070 int &ExtraSteps,
5071 bool &UseOneConst,
5072 bool Reciprocal) const {
5073 if (Enabled == ReciprocalEstimate::Enabled ||
5074 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
5075 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
5076 DAG, ExtraSteps)) {
5077 SDLoc DL(Operand);
5078 EVT VT = Operand.getValueType();
5079
5080 SDNodeFlags Flags;
5081 Flags.setUnsafeAlgebra(true);
5082
5083 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
5084 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
5085 for (int i = ExtraSteps; i > 0; --i) {
5086 SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
5087 Flags);
5088 Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
5089 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5090 }
5091 if (!Reciprocal) {
5092 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
5093 VT);
5094 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
5095 SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
5096
5097 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
5098 // Correct the result if the operand is 0.0.
5099 Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
5100 VT, Eq, Operand, Estimate);
5101 }
5102
5103 ExtraSteps = 0;
5104 return Estimate;
5105 }
5106
5107 return SDValue();
5108}
5109
5110SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
5111 SelectionDAG &DAG, int Enabled,
5112 int &ExtraSteps) const {
5113 if (Enabled == ReciprocalEstimate::Enabled)
5114 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
5115 DAG, ExtraSteps)) {
5116 SDLoc DL(Operand);
5117 EVT VT = Operand.getValueType();
5118
5119 SDNodeFlags Flags;
5120 Flags.setUnsafeAlgebra(true);
5121
5122 // Newton reciprocal iteration: E * (2 - X * E)
5123 // AArch64 reciprocal iteration instruction: (2 - M * N)
5124 for (int i = ExtraSteps; i > 0; --i) {
5125 SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
5126 Estimate, Flags);
5127 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5128 }
5129
5130 ExtraSteps = 0;
5131 return Estimate;
5132 }
5133
5134 return SDValue();
5135}
5136
5137//===----------------------------------------------------------------------===//
5138// AArch64 Inline Assembly Support
5139//===----------------------------------------------------------------------===//
5140
5141// Table of Constraints
5142// TODO: This is the current set of constraints supported by ARM for the
5143// compiler, not all of them may make sense, e.g. S may be difficult to support.
5144//
5145// r - A general register
5146// w - An FP/SIMD register of some size in the range v0-v31
5147// x - An FP/SIMD register of some size in the range v0-v15
5148// I - Constant that can be used with an ADD instruction
5149// J - Constant that can be used with a SUB instruction
5150// K - Constant that can be used with a 32-bit logical instruction
5151// L - Constant that can be used with a 64-bit logical instruction
5152// M - Constant that can be used as a 32-bit MOV immediate
5153// N - Constant that can be used as a 64-bit MOV immediate
5154// Q - A memory reference with base register and no offset
5155// S - A symbolic address
5156// Y - Floating point constant zero
5157// Z - Integer constant zero
5158//
5159// Note that general register operands will be output using their 64-bit x
5160// register name, whatever the size of the variable, unless the asm operand
5161// is prefixed by the %w modifier. Floating-point and SIMD register operands
5162// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
5163// %q modifier.
5164const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5165 // At this point, we have to lower this constraint to something else, so we
5166 // lower it to an "r" or "w". However, by doing this we will force the result
5167 // to be in register, while the X constraint is much more permissive.
5168 //
5169 // Although we are correct (we are free to emit anything, without
5170 // constraints), we might break use cases that would expect us to be more
5171 // efficient and emit something else.
5172 if (!Subtarget->hasFPARMv8())
5173 return "r";
5174
5175 if (ConstraintVT.isFloatingPoint())
5176 return "w";
5177
5178 if (ConstraintVT.isVector() &&
5179 (ConstraintVT.getSizeInBits() == 64 ||
5180 ConstraintVT.getSizeInBits() == 128))
5181 return "w";
5182
5183 return "r";
5184}
5185
5186/// getConstraintType - Given a constraint letter, return the type of
5187/// constraint it is for this target.
5188AArch64TargetLowering::ConstraintType
5189AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
5190 if (Constraint.size() == 1) {
5191 switch (Constraint[0]) {
5192 default:
5193 break;
5194 case 'z':
5195 return C_Other;
5196 case 'x':
5197 case 'w':
5198 return C_RegisterClass;
5199 // An address with a single base register. Due to the way we
5200 // currently handle addresses it is the same as 'r'.
5201 case 'Q':
5202 return C_Memory;
5203 }
5204 }
5205 return TargetLowering::getConstraintType(Constraint);
5206}
5207
5208/// Examine constraint type and operand type and determine a weight value.
5209/// This object must already have been set up with the operand type
5210/// and the current alternative constraint selected.
5211TargetLowering::ConstraintWeight
5212AArch64TargetLowering::getSingleConstraintMatchWeight(
5213 AsmOperandInfo &info, const char *constraint) const {
5214 ConstraintWeight weight = CW_Invalid;
5215 Value *CallOperandVal = info.CallOperandVal;
5216 // If we don't have a value, we can't do a match,
5217 // but allow it at the lowest weight.
5218 if (!CallOperandVal)
5219 return CW_Default;
5220 Type *type = CallOperandVal->getType();
5221 // Look at the constraint type.
5222 switch (*constraint) {
5223 default:
5224 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
5225 break;
5226 case 'x':
5227 case 'w':
5228 if (type->isFloatingPointTy() || type->isVectorTy())
5229 weight = CW_Register;
5230 break;
5231 case 'z':
5232 weight = CW_Constant;
5233 break;
5234 }
5235 return weight;
5236}
5237
5238std::pair<unsigned, const TargetRegisterClass *>
5239AArch64TargetLowering::getRegForInlineAsmConstraint(
5240 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5241 if (Constraint.size() == 1) {
5242 switch (Constraint[0]) {
5243 case 'r':
5244 if (VT.getSizeInBits() == 64)
5245 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
5246 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
5247 case 'w':
5248 if (VT.getSizeInBits() == 16)
5249 return std::make_pair(0U, &AArch64::FPR16RegClass);
5250 if (VT.getSizeInBits() == 32)
5251 return std::make_pair(0U, &AArch64::FPR32RegClass);
5252 if (VT.getSizeInBits() == 64)
5253 return std::make_pair(0U, &AArch64::FPR64RegClass);
5254 if (VT.getSizeInBits() == 128)
5255 return std::make_pair(0U, &AArch64::FPR128RegClass);
5256 break;
5257 // The instructions that this constraint is designed for can
5258 // only take 128-bit registers so just use that regclass.
5259 case 'x':
5260 if (VT.getSizeInBits() == 128)
5261 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
5262 break;
5263 }
5264 }
5265 if (StringRef("{cc}").equals_lower(Constraint))
5266 return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
5267
5268 // Use the default implementation in TargetLowering to convert the register
5269 // constraint into a member of a register class.
5270 std::pair<unsigned, const TargetRegisterClass *> Res;
5271 Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5272
5273 // Not found as a standard register?
5274 if (!Res.second) {
5275 unsigned Size = Constraint.size();
5276 if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
5277 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
5278 int RegNo;
5279 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
5280 if (!Failed && RegNo >= 0 && RegNo <= 31) {
5281 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
5282 // By default we'll emit v0-v31 for this unless there's a modifier where
5283 // we'll emit the correct register as well.
5284 if (VT != MVT::Other && VT.getSizeInBits() == 64) {
5285 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
5286 Res.second = &AArch64::FPR64RegClass;
5287 } else {
5288 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
5289 Res.second = &AArch64::FPR128RegClass;
5290 }
5291 }
5292 }
5293 }
5294
5295 return Res;
5296}
5297
5298/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5299/// vector. If it is invalid, don't add anything to Ops.
5300void AArch64TargetLowering::LowerAsmOperandForConstraint(
5301 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
5302 SelectionDAG &DAG) const {
5303 SDValue Result;
5304
5305 // Currently only support length 1 constraints.
5306 if (Constraint.length() != 1)
5307 return;
5308
5309 char ConstraintLetter = Constraint[0];
5310 switch (ConstraintLetter) {
5311 default:
5312 break;
5313
5314 // This set of constraints deal with valid constants for various instructions.
5315 // Validate and return a target constant for them if we can.
5316 case 'z': {
5317 // 'z' maps to xzr or wzr so it needs an input of 0.
5318 if (!isNullConstant(Op))
5319 return;
5320
5321 if (Op.getValueType() == MVT::i64)
5322 Result = DAG.getRegister(AArch64::XZR, MVT::i64);
5323 else
5324 Result = DAG.getRegister(AArch64::WZR, MVT::i32);
5325 break;
5326 }
5327
5328 case 'I':
5329 case 'J':
5330 case 'K':
5331 case 'L':
5332 case 'M':
5333 case 'N':
5334 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5335 if (!C)
5336 return;
5337
5338 // Grab the value and do some validation.
5339 uint64_t CVal = C->getZExtValue();
5340 switch (ConstraintLetter) {
5341 // The I constraint applies only to simple ADD or SUB immediate operands:
5342 // i.e. 0 to 4095 with optional shift by 12
5343 // The J constraint applies only to ADD or SUB immediates that would be
5344 // valid when negated, i.e. if [an add pattern] were to be output as a SUB
5345 // instruction [or vice versa], in other words -1 to -4095 with optional
5346 // left shift by 12.
5347 case 'I':
5348 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
5349 break;
5350 return;
5351 case 'J': {
5352 uint64_t NVal = -C->getSExtValue();
5353 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
5354 CVal = C->getSExtValue();
5355 break;
5356 }
5357 return;
5358 }
5359 // The K and L constraints apply *only* to logical immediates, including
5360 // what used to be the MOVI alias for ORR (though the MOVI alias has now
5361 // been removed and MOV should be used). So these constraints have to
5362 // distinguish between bit patterns that are valid 32-bit or 64-bit
5363 // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
5364 // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
5365 // versa.
5366 case 'K':
5367 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5368 break;
5369 return;
5370 case 'L':
5371 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5372 break;
5373 return;
5374 // The M and N constraints are a superset of K and L respectively, for use
5375 // with the MOV (immediate) alias. As well as the logical immediates they
5376 // also match 32 or 64-bit immediates that can be loaded either using a
5377 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
5378 // (M) or 64-bit 0x1234000000000000 (N) etc.
5379 // As a note some of this code is liberally stolen from the asm parser.
5380 case 'M': {
5381 if (!isUInt<32>(CVal))
5382 return;
5383 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5384 break;
5385 if ((CVal & 0xFFFF) == CVal)
5386 break;
5387 if ((CVal & 0xFFFF0000ULL) == CVal)
5388 break;
5389 uint64_t NCVal = ~(uint32_t)CVal;
5390 if ((NCVal & 0xFFFFULL) == NCVal)
5391 break;
5392 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5393 break;
5394 return;
5395 }
5396 case 'N': {
5397 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5398 break;
5399 if ((CVal & 0xFFFFULL) == CVal)
5400 break;
5401 if ((CVal & 0xFFFF0000ULL) == CVal)
5402 break;
5403 if ((CVal & 0xFFFF00000000ULL) == CVal)
5404 break;
5405 if ((CVal & 0xFFFF000000000000ULL) == CVal)
5406 break;
5407 uint64_t NCVal = ~CVal;
5408 if ((NCVal & 0xFFFFULL) == NCVal)
5409 break;
5410 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5411 break;
5412 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
5413 break;
5414 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
5415 break;
5416 return;
5417 }
5418 default:
5419 return;
5420 }
5421
5422 // All assembler immediates are 64-bit integers.
5423 Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
5424 break;
5425 }
5426
5427 if (Result.getNode()) {
5428 Ops.push_back(Result);
5429 return;
5430 }
5431
5432 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5433}
5434
5435//===----------------------------------------------------------------------===//
5436// AArch64 Advanced SIMD Support
5437//===----------------------------------------------------------------------===//
5438
5439/// WidenVector - Given a value in the V64 register class, produce the
5440/// equivalent value in the V128 register class.
5441static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
5442 EVT VT = V64Reg.getValueType();
5443 unsigned NarrowSize = VT.getVectorNumElements();
5444 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5445 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
5446 SDLoc DL(V64Reg);
5447
5448 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
5449 V64Reg, DAG.getConstant(0, DL, MVT::i32));
5450}
5451
5452/// getExtFactor - Determine the adjustment factor for the position when
5453/// generating an "extract from vector registers" instruction.
5454static unsigned getExtFactor(SDValue &V) {
5455 EVT EltType = V.getValueType().getVectorElementType();
5456 return EltType.getSizeInBits() / 8;
5457}
5458
5459/// NarrowVector - Given a value in the V128 register class, produce the
5460/// equivalent value in the V64 register class.
5461static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
5462 EVT VT = V128Reg.getValueType();
5463 unsigned WideSize = VT.getVectorNumElements();
5464 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5465 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
5466 SDLoc DL(V128Reg);
5467
5468 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
5469}
5470
5471// Gather data to see if the operation can be modelled as a
5472// shuffle in combination with VEXTs.
5473SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
5474 SelectionDAG &DAG) const {
5475 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5475, __extension__ __PRETTY_FUNCTION__))
;
5476 DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::ReconstructShuffle\n"
; } } while (false)
;
5477 SDLoc dl(Op);
5478 EVT VT = Op.getValueType();
5479 unsigned NumElts = VT.getVectorNumElements();
5480
5481 struct ShuffleSourceInfo {
5482 SDValue Vec;
5483 unsigned MinElt;
5484 unsigned MaxElt;
5485
5486 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
5487 // be compatible with the shuffle we intend to construct. As a result
5488 // ShuffleVec will be some sliding window into the original Vec.
5489 SDValue ShuffleVec;
5490
5491 // Code should guarantee that element i in Vec starts at element "WindowBase
5492 // + i * WindowScale in ShuffleVec".
5493 int WindowBase;
5494 int WindowScale;
5495
5496 ShuffleSourceInfo(SDValue Vec)
5497 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
5498 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
5499
5500 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
5501 };
5502
5503 // First gather all vectors used as an immediate source for this BUILD_VECTOR
5504 // node.
5505 SmallVector<ShuffleSourceInfo, 2> Sources;
5506 for (unsigned i = 0; i < NumElts; ++i) {
5507 SDValue V = Op.getOperand(i);
5508 if (V.isUndef())
5509 continue;
5510 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5511 !isa<ConstantSDNode>(V.getOperand(1))) {
5512 DEBUG(dbgs() << "Reshuffle failed: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5513 "a shuffle can only come from building a vector from "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5514 "various elements of other vectors, provided their "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5515 "indices are constant\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
;
5516 return SDValue();
5517 }
5518
5519 // Add this element source to the list if it's not already there.
5520 SDValue SourceVec = V.getOperand(0);
5521 auto Source = find(Sources, SourceVec);
5522 if (Source == Sources.end())
5523 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
5524
5525 // Update the minimum and maximum lane number seen.
5526 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
5527 Source->MinElt = std::min(Source->MinElt, EltNo);
5528 Source->MaxElt = std::max(Source->MaxElt, EltNo);
5529 }
5530
5531 if (Sources.size() > 2) {
5532 DEBUG(dbgs() << "Reshuffle failed: currently only do something sane when at "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
5533 "most two source vectors are involved\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
;
5534 return SDValue();
5535 }
5536
5537 // Find out the smallest element size among result and two sources, and use
5538 // it as element size to build the shuffle_vector.
5539 EVT SmallestEltTy = VT.getVectorElementType();
5540 for (auto &Source : Sources) {
5541 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
5542 if (SrcEltTy.bitsLT(SmallestEltTy)) {
5543 SmallestEltTy = SrcEltTy;
5544 }
5545 }
5546 unsigned ResMultiplier =
5547 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
5548 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
5549 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
5550
5551 // If the source vector is too wide or too narrow, we may nevertheless be able
5552 // to construct a compatible shuffle either by concatenating it with UNDEF or
5553 // extracting a suitable range of elements.
5554 for (auto &Src : Sources) {
5555 EVT SrcVT = Src.ShuffleVec.getValueType();
5556
5557 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
5558 continue;
5559
5560 // This stage of the search produces a source with the same element type as
5561 // the original, but with a total width matching the BUILD_VECTOR output.
5562 EVT EltVT = SrcVT.getVectorElementType();
5563 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
5564 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
5565
5566 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
5567 assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits())(static_cast <bool> (2 * SrcVT.getSizeInBits() == VT.getSizeInBits
()) ? void (0) : __assert_fail ("2 * SrcVT.getSizeInBits() == VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5567, __extension__ __PRETTY_FUNCTION__))
;
5568 // We can pad out the smaller vector for free, so if it's part of a
5569 // shuffle...
5570 Src.ShuffleVec =
5571 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
5572 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
5573 continue;
5574 }
5575
5576 assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits())(static_cast <bool> (SrcVT.getSizeInBits() == 2 * VT.getSizeInBits
()) ? void (0) : __assert_fail ("SrcVT.getSizeInBits() == 2 * VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5576, __extension__ __PRETTY_FUNCTION__))
;
5577
5578 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
5579 DEBUG(dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
;
5580 return SDValue();
5581 }
5582
5583 if (Src.MinElt >= NumSrcElts) {
5584 // The extraction can just take the second half
5585 Src.ShuffleVec =
5586 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5587 DAG.getConstant(NumSrcElts, dl, MVT::i64));
5588 Src.WindowBase = -NumSrcElts;
5589 } else if (Src.MaxElt < NumSrcElts) {
5590 // The extraction can just take the first half
5591 Src.ShuffleVec =
5592 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5593 DAG.getConstant(0, dl, MVT::i64));
5594 } else {
5595 // An actual VEXT is needed
5596 SDValue VEXTSrc1 =
5597 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5598 DAG.getConstant(0, dl, MVT::i64));
5599 SDValue VEXTSrc2 =
5600 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5601 DAG.getConstant(NumSrcElts, dl, MVT::i64));
5602 unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
5603
5604 Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
5605 VEXTSrc2,
5606 DAG.getConstant(Imm, dl, MVT::i32));
5607 Src.WindowBase = -Src.MinElt;
5608 }
5609 }
5610
5611 // Another possible incompatibility occurs from the vector element types. We
5612 // can fix this by bitcasting the source vectors to the same type we intend
5613 // for the shuffle.
5614 for (auto &Src : Sources) {
5615 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
5616 if (SrcEltTy == SmallestEltTy)
5617 continue;
5618 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)(static_cast <bool> (ShuffleVT.getVectorElementType() ==
SmallestEltTy) ? void (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5618, __extension__ __PRETTY_FUNCTION__))
;
5619 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
5620 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
5621 Src.WindowBase *= Src.WindowScale;
5622 }
5623
5624 // Final sanity check before we try to actually produce a shuffle.
5625 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5627, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
5626 for (auto Src : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5627, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
5627 assert(Src.ShuffleVec.getValueType() == ShuffleVT);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5627, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
5628 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5627, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
;
5629
5630 // The stars all align, our next step is to produce the mask for the shuffle.
5631 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
5632 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
5633 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
5634 SDValue Entry = Op.getOperand(i);
5635 if (Entry.isUndef())
5636 continue;
5637
5638 auto Src = find(Sources, Entry.getOperand(0));
5639 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
5640
5641 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
5642 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
5643 // segment.
5644 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
5645 int BitsDefined =
5646 std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits());
5647 int LanesDefined = BitsDefined / BitsPerShuffleLane;
5648
5649 // This source is expected to fill ResMultiplier lanes of the final shuffle,
5650 // starting at the appropriate offset.
5651 int *LaneMask = &Mask[i * ResMultiplier];
5652
5653 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
5654 ExtractBase += NumElts * (Src - Sources.begin());
5655 for (int j = 0; j < LanesDefined; ++j)
5656 LaneMask[j] = ExtractBase + j;
5657 }
5658
5659 // Final check before we try to produce nonsense...
5660 if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
5661 DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: illegal shuffle mask\n"
; } } while (false)
;
5662 return SDValue();
5663 }
5664
5665 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
5666 for (unsigned i = 0; i < Sources.size(); ++i)
5667 ShuffleOps[i] = Sources[i].ShuffleVec;
5668
5669 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
5670 ShuffleOps[1], Mask);
5671 SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
5672
5673 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5674 dbgs() << "Reshuffle, creating node: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5675 Shuffle.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5676 dbgs() << "Reshuffle, creating node: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5677 V.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5678 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
;
5679
5680 return V;
5681}
5682
5683// check if an EXT instruction can handle the shuffle mask when the
5684// vector sources of the shuffle are the same.
5685static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5686 unsigned NumElts = VT.getVectorNumElements();
5687
5688 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5689 if (M[0] < 0)
5690 return false;
5691
5692 Imm = M[0];
5693
5694 // If this is a VEXT shuffle, the immediate value is the index of the first
5695 // element. The other shuffle indices must be the successive elements after
5696 // the first one.
5697 unsigned ExpectedElt = Imm;
5698 for (unsigned i = 1; i < NumElts; ++i) {
5699 // Increment the expected index. If it wraps around, just follow it
5700 // back to index zero and keep going.
5701 ++ExpectedElt;
5702 if (ExpectedElt == NumElts)
5703 ExpectedElt = 0;
5704
5705 if (M[i] < 0)
5706 continue; // ignore UNDEF indices
5707 if (ExpectedElt != static_cast<unsigned>(M[i]))
5708 return false;
5709 }
5710
5711 return true;
5712}
5713
5714// check if an EXT instruction can handle the shuffle mask when the
5715// vector sources of the shuffle are different.
5716static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
5717 unsigned &Imm) {
5718 // Look for the first non-undef element.
5719 const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
5720
5721 // Benefit form APInt to handle overflow when calculating expected element.
5722 unsigned NumElts = VT.getVectorNumElements();
5723 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
5724 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
5725 // The following shuffle indices must be the successive elements after the
5726 // first real element.
5727 const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
5728 [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
5729 if (FirstWrongElt != M.end())
5730 return false;
5731
5732 // The index of an EXT is the first element if it is not UNDEF.
5733 // Watch out for the beginning UNDEFs. The EXT index should be the expected
5734 // value of the first element. E.g.
5735 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
5736 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
5737 // ExpectedElt is the last mask index plus 1.
5738 Imm = ExpectedElt.getZExtValue();
5739
5740 // There are two difference cases requiring to reverse input vectors.
5741 // For example, for vector <4 x i32> we have the following cases,
5742 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
5743 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
5744 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
5745 // to reverse two input vectors.
5746 if (Imm < NumElts)
5747 ReverseEXT = true;
5748 else
5749 Imm -= NumElts;
5750
5751 return true;
5752}
5753
5754/// isREVMask - Check if a vector shuffle corresponds to a REV
5755/// instruction with the specified blocksize. (The order of the elements
5756/// within each block of the vector is reversed.)
5757static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5758 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5759, __extension__ __PRETTY_FUNCTION__))
5759 "Only possible block sizes for REV are: 16, 32, 64")(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-7~svn329677/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5759, __extension__ __PRETTY_FUNCTION__))
;
5760
5761 unsigned EltSz = VT.getScalarSizeInBits();
5762 if (EltSz == 64)
5763 return false;
5764
5765 unsigned NumElts = VT.getVectorNumElements();
5766 unsigned BlockElts = M[0] + 1;
5767 // If the first shuffle index is UNDEF, be optimistic.
5768 if (M[0] < 0)
5769 BlockElts = BlockSize / EltSz;
5770
5771 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5772 return false;
5773
5774 for (unsigned i = 0; i < NumElts; ++i) {
5775 if (M[i] < 0)
5776 continue; // ignore UNDEF indices
5777 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
5778 return false;
5779 }
5780
5781 return true;
5782}
5783
5784static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5785 unsigned NumElts = VT.getVectorNumElements();
5786 WhichResult = (M[0] == 0 ? 0 : 1);
5787 unsigned Idx = WhichResult * NumElts / 2;
5788 for (unsigned i = 0; i != NumElts; i += 2) {
5789 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
5790 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
5791 return false;
5792 Idx += 1;
5793 }
5794
5795 return true;
5796}
5797
5798static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5799 unsigned NumElts = VT.getVectorNumElements();
5800 WhichResult = (M[0] == 0 ? 0 : 1);
5801 for (unsigned i = 0; i != NumElts; ++i) {
5802 if (M[i] < 0)
5803 continue; // ignore UNDEF indices
5804 if ((unsigned)M[i] != 2 * i + WhichResult)
5805 return false;
5806 }
5807
5808 return true;
5809}
5810
5811static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5812 unsigned NumElts = VT.getVectorNumElements();
5813 WhichResult = (M[0] == 0 ? 0 : 1);
5814 for (unsigned i = 0; i < NumElts; i += 2) {
5815 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
5816 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
5817 return false;
5818 }
5819 return true;
5820}
5821
5822/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
5823/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5824/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5825static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5826 unsigned NumElts = VT.getVectorNumElements();
5827 WhichResult = (M[0] == 0 ? 0 : 1);
5828 unsigned Idx = WhichResult * NumElts / 2;
5829 for (unsigned i = 0; i != NumElts; i += 2) {
5830 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
5831 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
5832 return false;
5833 Idx += 1;
5834 }
5835
5836 return true;
5837}
5838
5839/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
5840/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5841/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5842static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5843 unsigned Half = VT.getVectorNumElements() / 2;
5844 WhichResult = (M[0] == 0 ? 0 : 1);
5845 for (unsigned j = 0; j != 2; ++j) {
5846 unsigned Idx = WhichResult;
5847 for (unsigned i = 0; i != Half; ++i) {
5848 int MIdx = M[i + j * Half];
5849 if (MIdx >= 0 && (unsigned)MIdx != Idx)
5850 return false;
5851 Idx += 2;
5852 }
5853 }
5854
5855 return true;
5856}
5857
5858/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
5859/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5860/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5861static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5862 unsigned NumElts = VT.getVectorNumElements();
5863 WhichResult = (M[0] == 0 ? 0 : 1);
5864 for (unsigned i = 0; i < NumElts; i += 2) {
5865 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
5866 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
5867 return false;
5868 }
5869 return true;
5870}
5871
5872static bool isINSMask(ArrayRef<int> M, int NumInputElements,
5873 bool &DstIsLeft, int &Anomaly) {
5874 if (M.size() != static_cast<size_t>(NumInputElements))
5875 return false;
5876
5877 int NumLHSMatch = 0, NumRHSMatch = 0;
5878 int LastLHSMismatch = -1, LastRHSMismatch = -1;
5879
5880 for (int i = 0; i < NumInputElements; ++i) {
5881 if (M[i] == -1) {
5882 ++NumLHSMatch;
5883 ++NumRHSMatch;
5884 continue;
5885 }
5886
5887 if (M[i] == i)
5888 ++NumLHSMatch;
5889 else
5890 LastLHSMismatch = i;
5891
5892 if (M[i] == i + NumInputElements)
5893 ++NumRHSMatch;
5894 else
5895 LastRHSMismatch = i;
5896 }
5897
5898 if (NumLHSMatch == NumInputElements - 1) {
5899 DstIsLeft = true;
5900 Anomaly = LastLHSMismatch;
5901 return true;
5902 } else if (NumRHSMatch == NumInputElements - 1) {
5903 DstIsLeft = false;
5904 Anomaly = LastRHSMismatch;
5905 return true;
5906 }
5907
5908 return false;
5909}
5910
5911static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
5912 if (VT.getSizeInBits() != 128)
5913 return false;
5914
5915 unsigned NumElts = VT.getVectorNumElements();
5916
5917 for (int I = 0, E = NumElts / 2; I != E; I++) {
5918 if (Mask[I] != I)
5919 return false;
5920 }
5921
5922 int Offset = NumElts / 2;
5923 for (int I = NumElts / 2, E = NumElts; I != E; I++) {
5924 if (Mask[I] != I + SplitLHS * Offset)
5925 return false;
5926 }
5927
5928 return true;
5929}
5930
5931static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
5932 SDLoc DL(Op);
5933 EVT VT = Op.getValueType();