Bug Summary

File:lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 8834, column 48
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-8~svn350071/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-8~svn350071/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn350071/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn350071/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-8~svn350071=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-12-27-042839-1215-1 -x c++ /build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp -faddrsig

/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the AArch64TargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64ISelLowering.h"
15#include "AArch64CallingConvention.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/CallingConvLower.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstr.h"
38#include "llvm/CodeGen/MachineInstrBuilder.h"
39#include "llvm/CodeGen/MachineMemOperand.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/RuntimeLibcalls.h"
42#include "llvm/CodeGen/SelectionDAG.h"
43#include "llvm/CodeGen/SelectionDAGNodes.h"
44#include "llvm/CodeGen/TargetCallingConv.h"
45#include "llvm/CodeGen/TargetInstrInfo.h"
46#include "llvm/CodeGen/ValueTypes.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/DebugLoc.h"
51#include "llvm/IR/DerivedTypes.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/GetElementPtrTypeIterator.h"
54#include "llvm/IR/GlobalValue.h"
55#include "llvm/IR/IRBuilder.h"
56#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Instructions.h"
58#include "llvm/IR/Intrinsics.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/OperandTraits.h"
61#include "llvm/IR/Type.h"
62#include "llvm/IR/Use.h"
63#include "llvm/IR/Value.h"
64#include "llvm/MC/MCRegisterInfo.h"
65#include "llvm/Support/Casting.h"
66#include "llvm/Support/CodeGen.h"
67#include "llvm/Support/CommandLine.h"
68#include "llvm/Support/Compiler.h"
69#include "llvm/Support/Debug.h"
70#include "llvm/Support/ErrorHandling.h"
71#include "llvm/Support/KnownBits.h"
72#include "llvm/Support/MachineValueType.h"
73#include "llvm/Support/MathExtras.h"
74#include "llvm/Support/raw_ostream.h"
75#include "llvm/Target/TargetMachine.h"
76#include "llvm/Target/TargetOptions.h"
77#include <algorithm>
78#include <bitset>
79#include <cassert>
80#include <cctype>
81#include <cstdint>
82#include <cstdlib>
83#include <iterator>
84#include <limits>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90
91#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
92
93STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
94STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts", {0}, {false}}
;
95STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized", {0}, {false}}
;
96
97static cl::opt<bool>
98EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
99 cl::desc("Allow AArch64 SLI/SRI formation"),
100 cl::init(false));
101
102// FIXME: The necessary dtprel relocations don't seem to be supported
103// well in the GNU bfd and gold linkers at the moment. Therefore, by
104// default, for now, fall back to GeneralDynamic code generation.
105cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
106 "aarch64-elf-ldtls-generation", cl::Hidden,
107 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
108 cl::init(false));
109
110static cl::opt<bool>
111EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
112 cl::desc("Enable AArch64 logical imm instruction "
113 "optimization"),
114 cl::init(true));
115
116/// Value type used for condition codes.
117static const MVT MVT_CC = MVT::i32;
118
119AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
120 const AArch64Subtarget &STI)
121 : TargetLowering(TM), Subtarget(&STI) {
122 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
123 // we have to make something up. Arbitrarily, choose ZeroOrOne.
124 setBooleanContents(ZeroOrOneBooleanContent);
125 // When comparing vectors the result sets the different elements in the
126 // vector to all-one or all-zero.
127 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
128
129 // Set up the register classes.
130 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
131 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
132
133 if (Subtarget->hasFPARMv8()) {
134 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
135 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
136 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
137 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
138 }
139
140 if (Subtarget->hasNEON()) {
141 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
142 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
143 // Someone set us up the NEON.
144 addDRTypeForNEON(MVT::v2f32);
145 addDRTypeForNEON(MVT::v8i8);
146 addDRTypeForNEON(MVT::v4i16);
147 addDRTypeForNEON(MVT::v2i32);
148 addDRTypeForNEON(MVT::v1i64);
149 addDRTypeForNEON(MVT::v1f64);
150 addDRTypeForNEON(MVT::v4f16);
151
152 addQRTypeForNEON(MVT::v4f32);
153 addQRTypeForNEON(MVT::v2f64);
154 addQRTypeForNEON(MVT::v16i8);
155 addQRTypeForNEON(MVT::v8i16);
156 addQRTypeForNEON(MVT::v4i32);
157 addQRTypeForNEON(MVT::v2i64);
158 addQRTypeForNEON(MVT::v8f16);
159 }
160
161 // Compute derived properties from the register classes
162 computeRegisterProperties(Subtarget->getRegisterInfo());
163
164 // Provide all sorts of operation actions
165 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
166 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
167 setOperationAction(ISD::SETCC, MVT::i32, Custom);
168 setOperationAction(ISD::SETCC, MVT::i64, Custom);
169 setOperationAction(ISD::SETCC, MVT::f16, Custom);
170 setOperationAction(ISD::SETCC, MVT::f32, Custom);
171 setOperationAction(ISD::SETCC, MVT::f64, Custom);
172 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
173 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
174 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
175 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
176 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
177 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
178 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
179 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
180 setOperationAction(ISD::SELECT, MVT::i32, Custom);
181 setOperationAction(ISD::SELECT, MVT::i64, Custom);
182 setOperationAction(ISD::SELECT, MVT::f16, Custom);
183 setOperationAction(ISD::SELECT, MVT::f32, Custom);
184 setOperationAction(ISD::SELECT, MVT::f64, Custom);
185 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
186 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
187 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
188 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
189 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
190 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
191 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
192
193 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
194 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
195 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
196
197 setOperationAction(ISD::FREM, MVT::f32, Expand);
198 setOperationAction(ISD::FREM, MVT::f64, Expand);
199 setOperationAction(ISD::FREM, MVT::f80, Expand);
200
201 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
202
203 // Custom lowering hooks are needed for XOR
204 // to fold it into CSINC/CSINV.
205 setOperationAction(ISD::XOR, MVT::i32, Custom);
206 setOperationAction(ISD::XOR, MVT::i64, Custom);
207
208 // Virtually no operation on f128 is legal, but LLVM can't expand them when
209 // there's a valid register class, so we need custom operations in most cases.
210 setOperationAction(ISD::FABS, MVT::f128, Expand);
211 setOperationAction(ISD::FADD, MVT::f128, Custom);
212 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
213 setOperationAction(ISD::FCOS, MVT::f128, Expand);
214 setOperationAction(ISD::FDIV, MVT::f128, Custom);
215 setOperationAction(ISD::FMA, MVT::f128, Expand);
216 setOperationAction(ISD::FMUL, MVT::f128, Custom);
217 setOperationAction(ISD::FNEG, MVT::f128, Expand);
218 setOperationAction(ISD::FPOW, MVT::f128, Expand);
219 setOperationAction(ISD::FREM, MVT::f128, Expand);
220 setOperationAction(ISD::FRINT, MVT::f128, Expand);
221 setOperationAction(ISD::FSIN, MVT::f128, Expand);
222 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
223 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
224 setOperationAction(ISD::FSUB, MVT::f128, Custom);
225 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
226 setOperationAction(ISD::SETCC, MVT::f128, Custom);
227 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
228 setOperationAction(ISD::SELECT, MVT::f128, Custom);
229 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
230 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
231
232 // Lowering for many of the conversions is actually specified by the non-f128
233 // type. The LowerXXX function will be trivial when f128 isn't involved.
234 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
235 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
236 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
237 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
238 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
239 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
240 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
241 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
242 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
243 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
244 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
245 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
246 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
247 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
248
249 // Variable arguments.
250 setOperationAction(ISD::VASTART, MVT::Other, Custom);
251 setOperationAction(ISD::VAARG, MVT::Other, Custom);
252 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
253 setOperationAction(ISD::VAEND, MVT::Other, Expand);
254
255 // Variable-sized objects.
256 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
257 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
258
259 if (Subtarget->isTargetWindows())
260 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
261 else
262 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
263
264 // Constant pool entries
265 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
266
267 // BlockAddress
268 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
269
270 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
271 setOperationAction(ISD::ADDC, MVT::i32, Custom);
272 setOperationAction(ISD::ADDE, MVT::i32, Custom);
273 setOperationAction(ISD::SUBC, MVT::i32, Custom);
274 setOperationAction(ISD::SUBE, MVT::i32, Custom);
275 setOperationAction(ISD::ADDC, MVT::i64, Custom);
276 setOperationAction(ISD::ADDE, MVT::i64, Custom);
277 setOperationAction(ISD::SUBC, MVT::i64, Custom);
278 setOperationAction(ISD::SUBE, MVT::i64, Custom);
279
280 // AArch64 lacks both left-rotate and popcount instructions.
281 setOperationAction(ISD::ROTL, MVT::i32, Expand);
282 setOperationAction(ISD::ROTL, MVT::i64, Expand);
283 for (MVT VT : MVT::vector_valuetypes()) {
284 setOperationAction(ISD::ROTL, VT, Expand);
285 setOperationAction(ISD::ROTR, VT, Expand);
286 }
287
288 // AArch64 doesn't have {U|S}MUL_LOHI.
289 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
290 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
291
292 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
293 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
294
295 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
296 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
297 for (MVT VT : MVT::vector_valuetypes()) {
298 setOperationAction(ISD::SDIVREM, VT, Expand);
299 setOperationAction(ISD::UDIVREM, VT, Expand);
300 }
301 setOperationAction(ISD::SREM, MVT::i32, Expand);
302 setOperationAction(ISD::SREM, MVT::i64, Expand);
303 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
304 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
305 setOperationAction(ISD::UREM, MVT::i32, Expand);
306 setOperationAction(ISD::UREM, MVT::i64, Expand);
307
308 // Custom lower Add/Sub/Mul with overflow.
309 setOperationAction(ISD::SADDO, MVT::i32, Custom);
310 setOperationAction(ISD::SADDO, MVT::i64, Custom);
311 setOperationAction(ISD::UADDO, MVT::i32, Custom);
312 setOperationAction(ISD::UADDO, MVT::i64, Custom);
313 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
314 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
315 setOperationAction(ISD::USUBO, MVT::i32, Custom);
316 setOperationAction(ISD::USUBO, MVT::i64, Custom);
317 setOperationAction(ISD::SMULO, MVT::i32, Custom);
318 setOperationAction(ISD::SMULO, MVT::i64, Custom);
319 setOperationAction(ISD::UMULO, MVT::i32, Custom);
320 setOperationAction(ISD::UMULO, MVT::i64, Custom);
321
322 setOperationAction(ISD::FSIN, MVT::f32, Expand);
323 setOperationAction(ISD::FSIN, MVT::f64, Expand);
324 setOperationAction(ISD::FCOS, MVT::f32, Expand);
325 setOperationAction(ISD::FCOS, MVT::f64, Expand);
326 setOperationAction(ISD::FPOW, MVT::f32, Expand);
327 setOperationAction(ISD::FPOW, MVT::f64, Expand);
328 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
329 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
330 if (Subtarget->hasFullFP16())
331 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
332 else
333 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
334
335 setOperationAction(ISD::FREM, MVT::f16, Promote);
336 setOperationAction(ISD::FREM, MVT::v4f16, Promote);
337 setOperationAction(ISD::FREM, MVT::v8f16, Promote);
338 setOperationAction(ISD::FPOW, MVT::f16, Promote);
339 setOperationAction(ISD::FPOW, MVT::v4f16, Promote);
340 setOperationAction(ISD::FPOW, MVT::v8f16, Promote);
341 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
342 setOperationAction(ISD::FCOS, MVT::f16, Promote);
343 setOperationAction(ISD::FCOS, MVT::v4f16, Promote);
344 setOperationAction(ISD::FCOS, MVT::v8f16, Promote);
345 setOperationAction(ISD::FSIN, MVT::f16, Promote);
346 setOperationAction(ISD::FSIN, MVT::v4f16, Promote);
347 setOperationAction(ISD::FSIN, MVT::v8f16, Promote);
348 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
349 setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
350 setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
351 setOperationAction(ISD::FEXP, MVT::f16, Promote);
352 setOperationAction(ISD::FEXP, MVT::v4f16, Promote);
353 setOperationAction(ISD::FEXP, MVT::v8f16, Promote);
354 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
355 setOperationAction(ISD::FEXP2, MVT::v4f16, Promote);
356 setOperationAction(ISD::FEXP2, MVT::v8f16, Promote);
357 setOperationAction(ISD::FLOG, MVT::f16, Promote);
358 setOperationAction(ISD::FLOG, MVT::v4f16, Promote);
359 setOperationAction(ISD::FLOG, MVT::v8f16, Promote);
360 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
361 setOperationAction(ISD::FLOG2, MVT::v4f16, Promote);
362 setOperationAction(ISD::FLOG2, MVT::v8f16, Promote);
363 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
364 setOperationAction(ISD::FLOG10, MVT::v4f16, Promote);
365 setOperationAction(ISD::FLOG10, MVT::v8f16, Promote);
366
367 if (!Subtarget->hasFullFP16()) {
368 setOperationAction(ISD::SELECT, MVT::f16, Promote);
369 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
370 setOperationAction(ISD::SETCC, MVT::f16, Promote);
371 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
372 setOperationAction(ISD::FADD, MVT::f16, Promote);
373 setOperationAction(ISD::FSUB, MVT::f16, Promote);
374 setOperationAction(ISD::FMUL, MVT::f16, Promote);
375 setOperationAction(ISD::FDIV, MVT::f16, Promote);
376 setOperationAction(ISD::FMA, MVT::f16, Promote);
377 setOperationAction(ISD::FNEG, MVT::f16, Promote);
378 setOperationAction(ISD::FABS, MVT::f16, Promote);
379 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
380 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
381 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
382 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
383 setOperationAction(ISD::FRINT, MVT::f16, Promote);
384 setOperationAction(ISD::FROUND, MVT::f16, Promote);
385 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
386 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
387 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
388 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
389 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
390
391 // promote v4f16 to v4f32 when that is known to be safe.
392 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
393 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
394 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
395 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
396 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
397 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
398 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
399 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
400 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
401 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
402 AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
403 AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
404
405 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
406 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
407 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
408 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
409 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
410 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
411 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
412 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
413 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
414 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
415 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
416 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
417 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
418 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
419 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
420
421 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
422 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
423 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
424 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
425 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
426 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
427 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
428 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
429 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
430 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
431 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
432 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
433 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
434 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
435 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
436 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
437 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
438 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
439 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
440 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
441 }
442
443 // AArch64 has implementations of a lot of rounding-like FP operations.
444 for (MVT Ty : {MVT::f32, MVT::f64}) {
445 setOperationAction(ISD::FFLOOR, Ty, Legal);
446 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
447 setOperationAction(ISD::FCEIL, Ty, Legal);
448 setOperationAction(ISD::FRINT, Ty, Legal);
449 setOperationAction(ISD::FTRUNC, Ty, Legal);
450 setOperationAction(ISD::FROUND, Ty, Legal);
451 setOperationAction(ISD::FMINNUM, Ty, Legal);
452 setOperationAction(ISD::FMAXNUM, Ty, Legal);
453 setOperationAction(ISD::FMINIMUM, Ty, Legal);
454 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
455 }
456
457 if (Subtarget->hasFullFP16()) {
458 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
459 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
460 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
461 setOperationAction(ISD::FRINT, MVT::f16, Legal);
462 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
463 setOperationAction(ISD::FROUND, MVT::f16, Legal);
464 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
465 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
466 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
467 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
468 }
469
470 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
471
472 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
473
474 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
475 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
476 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
477 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
478 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
479
480 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
481 // This requires the Performance Monitors extension.
482 if (Subtarget->hasPerfMon())
483 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
484
485 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
486 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
487 // Issue __sincos_stret if available.
488 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
489 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
490 } else {
491 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
492 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
493 }
494
495 // Make floating-point constants legal for the large code model, so they don't
496 // become loads from the constant pool.
497 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
498 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
499 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
500 }
501
502 // AArch64 does not have floating-point extending loads, i1 sign-extending
503 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
504 for (MVT VT : MVT::fp_valuetypes()) {
505 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
506 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
507 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
508 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
509 }
510 for (MVT VT : MVT::integer_valuetypes())
511 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
512
513 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
514 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
515 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
516 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
517 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
518 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
519 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
520
521 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
522 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
523
524 // Indexed loads and stores are supported.
525 for (unsigned im = (unsigned)ISD::PRE_INC;
526 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
527 setIndexedLoadAction(im, MVT::i8, Legal);
528 setIndexedLoadAction(im, MVT::i16, Legal);
529 setIndexedLoadAction(im, MVT::i32, Legal);
530 setIndexedLoadAction(im, MVT::i64, Legal);
531 setIndexedLoadAction(im, MVT::f64, Legal);
532 setIndexedLoadAction(im, MVT::f32, Legal);
533 setIndexedLoadAction(im, MVT::f16, Legal);
534 setIndexedStoreAction(im, MVT::i8, Legal);
535 setIndexedStoreAction(im, MVT::i16, Legal);
536 setIndexedStoreAction(im, MVT::i32, Legal);
537 setIndexedStoreAction(im, MVT::i64, Legal);
538 setIndexedStoreAction(im, MVT::f64, Legal);
539 setIndexedStoreAction(im, MVT::f32, Legal);
540 setIndexedStoreAction(im, MVT::f16, Legal);
541 }
542
543 // Trap.
544 setOperationAction(ISD::TRAP, MVT::Other, Legal);
545
546 // We combine OR nodes for bitfield operations.
547 setTargetDAGCombine(ISD::OR);
548
549 // Vector add and sub nodes may conceal a high-half opportunity.
550 // Also, try to fold ADD into CSINC/CSINV..
551 setTargetDAGCombine(ISD::ADD);
552 setTargetDAGCombine(ISD::SUB);
553 setTargetDAGCombine(ISD::SRL);
554 setTargetDAGCombine(ISD::XOR);
555 setTargetDAGCombine(ISD::SINT_TO_FP);
556 setTargetDAGCombine(ISD::UINT_TO_FP);
557
558 setTargetDAGCombine(ISD::FP_TO_SINT);
559 setTargetDAGCombine(ISD::FP_TO_UINT);
560 setTargetDAGCombine(ISD::FDIV);
561
562 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
563
564 setTargetDAGCombine(ISD::ANY_EXTEND);
565 setTargetDAGCombine(ISD::ZERO_EXTEND);
566 setTargetDAGCombine(ISD::SIGN_EXTEND);
567 setTargetDAGCombine(ISD::BITCAST);
568 setTargetDAGCombine(ISD::CONCAT_VECTORS);
569 setTargetDAGCombine(ISD::STORE);
570 if (Subtarget->supportsAddressTopByteIgnored())
571 setTargetDAGCombine(ISD::LOAD);
572
573 setTargetDAGCombine(ISD::MUL);
574
575 setTargetDAGCombine(ISD::SELECT);
576 setTargetDAGCombine(ISD::VSELECT);
577
578 setTargetDAGCombine(ISD::INTRINSIC_VOID);
579 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
580 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
581
582 setTargetDAGCombine(ISD::GlobalAddress);
583
584 // In case of strict alignment, avoid an excessive number of byte wide stores.
585 MaxStoresPerMemsetOptSize = 8;
586 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
587 ? MaxStoresPerMemsetOptSize : 32;
588
589 MaxGluedStoresPerMemcpy = 4;
590 MaxStoresPerMemcpyOptSize = 4;
591 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
592 ? MaxStoresPerMemcpyOptSize : 16;
593
594 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
595
596 setStackPointerRegisterToSaveRestore(AArch64::SP);
597
598 setSchedulingPreference(Sched::Hybrid);
599
600 EnableExtLdPromotion = true;
601
602 // Set required alignment.
603 setMinFunctionAlignment(2);
604 // Set preferred alignments.
605 setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
606 setPrefLoopAlignment(STI.getPrefLoopAlignment());
607
608 // Only change the limit for entries in a jump table if specified by
609 // the subtarget, but not at the command line.
610 unsigned MaxJT = STI.getMaximumJumpTableSize();
611 if (MaxJT && getMaximumJumpTableSize() == 0)
612 setMaximumJumpTableSize(MaxJT);
613
614 setHasExtractBitsInsn(true);
615
616 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
617
618 if (Subtarget->hasNEON()) {
619 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
620 // silliness like this:
621 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
622 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
623 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
624 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
625 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
626 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
627 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
628 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
629 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
630 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
631 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
632 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
633 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
634 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
635 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
636 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
637 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
638 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
639 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
640 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
641 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
642 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
643 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
644 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
645 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
646
647 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
648 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
649 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
650 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
651 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
652
653 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
654
655 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
656 // elements smaller than i32, so promote the input to i32 first.
657 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
658 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
659 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
660 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
661 // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
662 // -> v8f16 conversions.
663 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
664 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
665 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
666 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
667 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
668 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
669 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
670 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
671 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
672 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
673 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
674 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
675 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
676
677 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
678 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
679
680 setOperationAction(ISD::CTTZ, MVT::v2i8, Expand);
681 setOperationAction(ISD::CTTZ, MVT::v4i16, Expand);
682 setOperationAction(ISD::CTTZ, MVT::v2i32, Expand);
683 setOperationAction(ISD::CTTZ, MVT::v1i64, Expand);
684 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
685 setOperationAction(ISD::CTTZ, MVT::v8i16, Expand);
686 setOperationAction(ISD::CTTZ, MVT::v4i32, Expand);
687 setOperationAction(ISD::CTTZ, MVT::v2i64, Expand);
688
689 // AArch64 doesn't have MUL.2d:
690 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
691 // Custom handling for some quad-vector types to detect MULL.
692 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
693 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
694 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
695
696 // Vector reductions
697 for (MVT VT : MVT::integer_valuetypes()) {
698 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
699 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
700 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
701 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
702 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
703 }
704 for (MVT VT : MVT::fp_valuetypes()) {
705 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
706 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
707 }
708
709 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
710 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
711 // Likewise, narrowing and extending vector loads/stores aren't handled
712 // directly.
713 for (MVT VT : MVT::vector_valuetypes()) {
714 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
715
716 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
717 setOperationAction(ISD::MULHS, VT, Legal);
718 setOperationAction(ISD::MULHU, VT, Legal);
719 } else {
720 setOperationAction(ISD::MULHS, VT, Expand);
721 setOperationAction(ISD::MULHU, VT, Expand);
722 }
723 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
724 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
725
726 setOperationAction(ISD::BSWAP, VT, Expand);
727
728 for (MVT InnerVT : MVT::vector_valuetypes()) {
729 setTruncStoreAction(VT, InnerVT, Expand);
730 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
731 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
732 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
733 }
734 }
735
736 // AArch64 has implementations of a lot of rounding-like FP operations.
737 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
738 setOperationAction(ISD::FFLOOR, Ty, Legal);
739 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
740 setOperationAction(ISD::FCEIL, Ty, Legal);
741 setOperationAction(ISD::FRINT, Ty, Legal);
742 setOperationAction(ISD::FTRUNC, Ty, Legal);
743 setOperationAction(ISD::FROUND, Ty, Legal);
744 }
745
746 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
747 }
748
749 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
750}
751
752void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
753 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 753, __PRETTY_FUNCTION__))
;
754
755 if (VT.isFloatingPoint()) {
756 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
757 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
758 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
759 }
760
761 // Mark vector float intrinsics as expand.
762 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
763 setOperationAction(ISD::FSIN, VT, Expand);
764 setOperationAction(ISD::FCOS, VT, Expand);
765 setOperationAction(ISD::FPOW, VT, Expand);
766 setOperationAction(ISD::FLOG, VT, Expand);
767 setOperationAction(ISD::FLOG2, VT, Expand);
768 setOperationAction(ISD::FLOG10, VT, Expand);
769 setOperationAction(ISD::FEXP, VT, Expand);
770 setOperationAction(ISD::FEXP2, VT, Expand);
771
772 // But we do support custom-lowering for FCOPYSIGN.
773 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
774 }
775
776 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
777 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
778 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
779 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
780 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
781 setOperationAction(ISD::SRA, VT, Custom);
782 setOperationAction(ISD::SRL, VT, Custom);
783 setOperationAction(ISD::SHL, VT, Custom);
784 setOperationAction(ISD::AND, VT, Custom);
785 setOperationAction(ISD::OR, VT, Custom);
786 setOperationAction(ISD::SETCC, VT, Custom);
787 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
788
789 setOperationAction(ISD::SELECT, VT, Expand);
790 setOperationAction(ISD::SELECT_CC, VT, Expand);
791 setOperationAction(ISD::VSELECT, VT, Expand);
792 for (MVT InnerVT : MVT::all_valuetypes())
793 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
794
795 // CNT supports only B element sizes, then use UADDLP to widen.
796 if (VT != MVT::v8i8 && VT != MVT::v16i8)
797 setOperationAction(ISD::CTPOP, VT, Custom);
798
799 setOperationAction(ISD::UDIV, VT, Expand);
800 setOperationAction(ISD::SDIV, VT, Expand);
801 setOperationAction(ISD::UREM, VT, Expand);
802 setOperationAction(ISD::SREM, VT, Expand);
803 setOperationAction(ISD::FREM, VT, Expand);
804
805 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
806 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
807
808 if (!VT.isFloatingPoint())
809 setOperationAction(ISD::ABS, VT, Legal);
810
811 // [SU][MIN|MAX] are available for all NEON types apart from i64.
812 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
813 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
814 setOperationAction(Opcode, VT, Legal);
815
816 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
817 if (VT.isFloatingPoint() &&
818 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
819 for (unsigned Opcode :
820 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
821 setOperationAction(Opcode, VT, Legal);
822
823 if (Subtarget->isLittleEndian()) {
824 for (unsigned im = (unsigned)ISD::PRE_INC;
825 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
826 setIndexedLoadAction(im, VT, Legal);
827 setIndexedStoreAction(im, VT, Legal);
828 }
829 }
830}
831
832void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
833 addRegisterClass(VT, &AArch64::FPR64RegClass);
834 addTypeForNEON(VT, MVT::v2i32);
835}
836
837void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
838 addRegisterClass(VT, &AArch64::FPR128RegClass);
839 addTypeForNEON(VT, MVT::v4i32);
840}
841
842EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
843 EVT VT) const {
844 if (!VT.isVector())
845 return MVT::i32;
846 return VT.changeVectorElementTypeToInteger();
847}
848
849static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
850 const APInt &Demanded,
851 TargetLowering::TargetLoweringOpt &TLO,
852 unsigned NewOpc) {
853 uint64_t OldImm = Imm, NewImm, Enc;
854 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
855
856 // Return if the immediate is already all zeros, all ones, a bimm32 or a
857 // bimm64.
858 if (Imm == 0 || Imm == Mask ||
859 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
860 return false;
861
862 unsigned EltSize = Size;
863 uint64_t DemandedBits = Demanded.getZExtValue();
864
865 // Clear bits that are not demanded.
866 Imm &= DemandedBits;
867
868 while (true) {
869 // The goal here is to set the non-demanded bits in a way that minimizes
870 // the number of switching between 0 and 1. In order to achieve this goal,
871 // we set the non-demanded bits to the value of the preceding demanded bits.
872 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
873 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
874 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
875 // The final result is 0b11000011.
876 uint64_t NonDemandedBits = ~DemandedBits;
877 uint64_t InvertedImm = ~Imm & DemandedBits;
878 uint64_t RotatedImm =
879 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
880 NonDemandedBits;
881 uint64_t Sum = RotatedImm + NonDemandedBits;
882 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
883 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
884 NewImm = (Imm | Ones) & Mask;
885
886 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
887 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
888 // we halve the element size and continue the search.
889 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
890 break;
891
892 // We cannot shrink the element size any further if it is 2-bits.
893 if (EltSize == 2)
894 return false;
895
896 EltSize /= 2;
897 Mask >>= EltSize;
898 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
899
900 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
901 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
902 return false;
903
904 // Merge the upper and lower halves of Imm and DemandedBits.
905 Imm |= Hi;
906 DemandedBits |= DemandedBitsHi;
907 }
908
909 ++NumOptimizedImms;
910
911 // Replicate the element across the register width.
912 while (EltSize < Size) {
913 NewImm |= NewImm << EltSize;
914 EltSize *= 2;
915 }
916
917 (void)OldImm;
918 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 919, __PRETTY_FUNCTION__))
919 "demanded bits should never be altered")((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 919, __PRETTY_FUNCTION__))
;
920 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")((OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? static_cast<void> (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 920, __PRETTY_FUNCTION__))
;
921
922 // Create the new constant immediate node.
923 EVT VT = Op.getValueType();
924 SDLoc DL(Op);
925 SDValue New;
926
927 // If the new constant immediate is all-zeros or all-ones, let the target
928 // independent DAG combine optimize this node.
929 if (NewImm == 0 || NewImm == OrigMask) {
930 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
931 TLO.DAG.getConstant(NewImm, DL, VT));
932 // Otherwise, create a machine node so that target independent DAG combine
933 // doesn't undo this optimization.
934 } else {
935 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
936 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
937 New = SDValue(
938 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
939 }
940
941 return TLO.CombineTo(Op, New);
942}
943
944bool AArch64TargetLowering::targetShrinkDemandedConstant(
945 SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
946 // Delay this optimization to as late as possible.
947 if (!TLO.LegalOps)
948 return false;
949
950 if (!EnableOptimizeLogicalImm)
951 return false;
952
953 EVT VT = Op.getValueType();
954 if (VT.isVector())
955 return false;
956
957 unsigned Size = VT.getSizeInBits();
958 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 959, __PRETTY_FUNCTION__))
959 "i32 or i64 is expected after legalization.")(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 959, __PRETTY_FUNCTION__))
;
960
961 // Exit early if we demand all bits.
962 if (Demanded.countPopulation() == Size)
963 return false;
964
965 unsigned NewOpc;
966 switch (Op.getOpcode()) {
967 default:
968 return false;
969 case ISD::AND:
970 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
971 break;
972 case ISD::OR:
973 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
974 break;
975 case ISD::XOR:
976 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
977 break;
978 }
979 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
980 if (!C)
981 return false;
982 uint64_t Imm = C->getZExtValue();
983 return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
984}
985
986/// computeKnownBitsForTargetNode - Determine which of the bits specified in
987/// Mask are known to be either zero or one and return them Known.
988void AArch64TargetLowering::computeKnownBitsForTargetNode(
989 const SDValue Op, KnownBits &Known,
990 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
991 switch (Op.getOpcode()) {
992 default:
993 break;
994 case AArch64ISD::CSEL: {
995 KnownBits Known2;
996 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
997 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
998 Known.Zero &= Known2.Zero;
999 Known.One &= Known2.One;
1000 break;
1001 }
1002 case ISD::INTRINSIC_W_CHAIN: {
1003 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1004 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1005 switch (IntID) {
1006 default: return;
1007 case Intrinsic::aarch64_ldaxr:
1008 case Intrinsic::aarch64_ldxr: {
1009 unsigned BitWidth = Known.getBitWidth();
1010 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1011 unsigned MemBits = VT.getScalarSizeInBits();
1012 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1013 return;
1014 }
1015 }
1016 break;
1017 }
1018 case ISD::INTRINSIC_WO_CHAIN:
1019 case ISD::INTRINSIC_VOID: {
1020 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1021 switch (IntNo) {
1022 default:
1023 break;
1024 case Intrinsic::aarch64_neon_umaxv:
1025 case Intrinsic::aarch64_neon_uminv: {
1026 // Figure out the datatype of the vector operand. The UMINV instruction
1027 // will zero extend the result, so we can mark as known zero all the
1028 // bits larger than the element datatype. 32-bit or larget doesn't need
1029 // this as those are legal types and will be handled by isel directly.
1030 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1031 unsigned BitWidth = Known.getBitWidth();
1032 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1033 assert(BitWidth >= 8 && "Unexpected width!")((BitWidth >= 8 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1033, __PRETTY_FUNCTION__))
;
1034 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1035 Known.Zero |= Mask;
1036 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1037 assert(BitWidth >= 16 && "Unexpected width!")((BitWidth >= 16 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1037, __PRETTY_FUNCTION__))
;
1038 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1039 Known.Zero |= Mask;
1040 }
1041 break;
1042 } break;
1043 }
1044 }
1045 }
1046}
1047
1048MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1049 EVT) const {
1050 return MVT::i64;
1051}
1052
1053bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1054 unsigned AddrSpace,
1055 unsigned Align,
1056 bool *Fast) const {
1057 if (Subtarget->requiresStrictAlign())
1058 return false;
1059
1060 if (Fast) {
1061 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1062 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1063 // See comments in performSTORECombine() for more details about
1064 // these conditions.
1065
1066 // Code that uses clang vector extensions can mark that it
1067 // wants unaligned accesses to be treated as fast by
1068 // underspecifying alignment to be 1 or 2.
1069 Align <= 2 ||
1070
1071 // Disregard v2i64. Memcpy lowering produces those and splitting
1072 // them regresses performance on micro-benchmarks and olden/bh.
1073 VT == MVT::v2i64;
1074 }
1075 return true;
1076}
1077
1078FastISel *
1079AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1080 const TargetLibraryInfo *libInfo) const {
1081 return AArch64::createFastISel(funcInfo, libInfo);
1082}
1083
1084const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1085 switch ((AArch64ISD::NodeType)Opcode) {
1086 case AArch64ISD::FIRST_NUMBER: break;
1087 case AArch64ISD::CALL: return "AArch64ISD::CALL";
1088 case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
1089 case AArch64ISD::ADR: return "AArch64ISD::ADR";
1090 case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
1091 case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
1092 case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
1093 case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
1094 case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
1095 case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
1096 case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
1097 case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
1098 case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
1099 case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
1100 case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1101 case AArch64ISD::ADC: return "AArch64ISD::ADC";
1102 case AArch64ISD::SBC: return "AArch64ISD::SBC";
1103 case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
1104 case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
1105 case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
1106 case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
1107 case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
1108 case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
1109 case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
1110 case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
1111 case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
1112 case AArch64ISD::DUP: return "AArch64ISD::DUP";
1113 case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
1114 case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
1115 case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
1116 case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
1117 case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
1118 case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
1119 case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
1120 case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
1121 case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
1122 case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
1123 case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
1124 case AArch64ISD::BICi: return "AArch64ISD::BICi";
1125 case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
1126 case AArch64ISD::BSL: return "AArch64ISD::BSL";
1127 case AArch64ISD::NEG: return "AArch64ISD::NEG";
1128 case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
1129 case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
1130 case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
1131 case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
1132 case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
1133 case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
1134 case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
1135 case AArch64ISD::REV16: return "AArch64ISD::REV16";
1136 case AArch64ISD::REV32: return "AArch64ISD::REV32";
1137 case AArch64ISD::REV64: return "AArch64ISD::REV64";
1138 case AArch64ISD::EXT: return "AArch64ISD::EXT";
1139 case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
1140 case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
1141 case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
1142 case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
1143 case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
1144 case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
1145 case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
1146 case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
1147 case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
1148 case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
1149 case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
1150 case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
1151 case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
1152 case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
1153 case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
1154 case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
1155 case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
1156 case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
1157 case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
1158 case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
1159 case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
1160 case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
1161 case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
1162 case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
1163 case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
1164 case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
1165 case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
1166 case AArch64ISD::NOT: return "AArch64ISD::NOT";
1167 case AArch64ISD::BIT: return "AArch64ISD::BIT";
1168 case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
1169 case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
1170 case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
1171 case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
1172 case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
1173 case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
1174 case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
1175 case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
1176 case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
1177 case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
1178 case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
1179 case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
1180 case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
1181 case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
1182 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
1183 case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
1184 case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
1185 case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
1186 case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
1187 case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
1188 case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
1189 case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
1190 case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
1191 case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
1192 case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
1193 case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
1194 case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1195 case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1196 case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1197 case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1198 case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1199 case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1200 case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1201 case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1202 case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1203 case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1204 case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1205 case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1206 case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1207 case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1208 case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1209 case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1210 case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1211 case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1212 }
1213 return nullptr;
1214}
1215
1216MachineBasicBlock *
1217AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
1218 MachineBasicBlock *MBB) const {
1219 // We materialise the F128CSEL pseudo-instruction as some control flow and a
1220 // phi node:
1221
1222 // OrigBB:
1223 // [... previous instrs leading to comparison ...]
1224 // b.ne TrueBB
1225 // b EndBB
1226 // TrueBB:
1227 // ; Fallthrough
1228 // EndBB:
1229 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1230
1231 MachineFunction *MF = MBB->getParent();
1232 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1233 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1234 DebugLoc DL = MI.getDebugLoc();
1235 MachineFunction::iterator It = ++MBB->getIterator();
1236
1237 unsigned DestReg = MI.getOperand(0).getReg();
1238 unsigned IfTrueReg = MI.getOperand(1).getReg();
1239 unsigned IfFalseReg = MI.getOperand(2).getReg();
1240 unsigned CondCode = MI.getOperand(3).getImm();
1241 bool NZCVKilled = MI.getOperand(4).isKill();
1242
1243 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1244 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1245 MF->insert(It, TrueBB);
1246 MF->insert(It, EndBB);
1247
1248 // Transfer rest of current basic-block to EndBB
1249 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1250 MBB->end());
1251 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1252
1253 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1254 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1255 MBB->addSuccessor(TrueBB);
1256 MBB->addSuccessor(EndBB);
1257
1258 // TrueBB falls through to the end.
1259 TrueBB->addSuccessor(EndBB);
1260
1261 if (!NZCVKilled) {
1262 TrueBB->addLiveIn(AArch64::NZCV);
1263 EndBB->addLiveIn(AArch64::NZCV);
1264 }
1265
1266 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1267 .addReg(IfTrueReg)
1268 .addMBB(TrueBB)
1269 .addReg(IfFalseReg)
1270 .addMBB(MBB);
1271
1272 MI.eraseFromParent();
1273 return EndBB;
1274}
1275
1276MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
1277 MachineInstr &MI, MachineBasicBlock *BB) const {
1278 assert(!isAsynchronousEHPersonality(classifyEHPersonality(((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1280, __PRETTY_FUNCTION__))
1279 BB->getParent()->getFunction().getPersonalityFn())) &&((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1280, __PRETTY_FUNCTION__))
1280 "SEH does not use catchret!")((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1280, __PRETTY_FUNCTION__))
;
1281 return BB;
1282}
1283
1284MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad(
1285 MachineInstr &MI, MachineBasicBlock *BB) const {
1286 MI.eraseFromParent();
1287 return BB;
1288}
1289
1290MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
1291 MachineInstr &MI, MachineBasicBlock *BB) const {
1292 switch (MI.getOpcode()) {
1293 default:
1294#ifndef NDEBUG
1295 MI.dump();
1296#endif
1297 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1297)
;
1298
1299 case AArch64::F128CSEL:
1300 return EmitF128CSEL(MI, BB);
1301
1302 case TargetOpcode::STACKMAP:
1303 case TargetOpcode::PATCHPOINT:
1304 return emitPatchPoint(MI, BB);
1305
1306 case AArch64::CATCHRET:
1307 return EmitLoweredCatchRet(MI, BB);
1308 case AArch64::CATCHPAD:
1309 return EmitLoweredCatchPad(MI, BB);
1310 }
1311}
1312
1313//===----------------------------------------------------------------------===//
1314// AArch64 Lowering private implementation.
1315//===----------------------------------------------------------------------===//
1316
1317//===----------------------------------------------------------------------===//
1318// Lowering Code
1319//===----------------------------------------------------------------------===//
1320
1321/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1322/// CC
1323static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1324 switch (CC) {
1325 default:
1326 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1326)
;
1327 case ISD::SETNE:
1328 return AArch64CC::NE;
1329 case ISD::SETEQ:
1330 return AArch64CC::EQ;
1331 case ISD::SETGT:
1332 return AArch64CC::GT;
1333 case ISD::SETGE:
1334 return AArch64CC::GE;
1335 case ISD::SETLT:
1336 return AArch64CC::LT;
1337 case ISD::SETLE:
1338 return AArch64CC::LE;
1339 case ISD::SETUGT:
1340 return AArch64CC::HI;
1341 case ISD::SETUGE:
1342 return AArch64CC::HS;
1343 case ISD::SETULT:
1344 return AArch64CC::LO;
1345 case ISD::SETULE:
1346 return AArch64CC::LS;
1347 }
1348}
1349
1350/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1351static void changeFPCCToAArch64CC(ISD::CondCode CC,
1352 AArch64CC::CondCode &CondCode,
1353 AArch64CC::CondCode &CondCode2) {
1354 CondCode2 = AArch64CC::AL;
1355 switch (CC) {
1356 default:
1357 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1357)
;
1358 case ISD::SETEQ:
1359 case ISD::SETOEQ:
1360 CondCode = AArch64CC::EQ;
1361 break;
1362 case ISD::SETGT:
1363 case ISD::SETOGT:
1364 CondCode = AArch64CC::GT;
1365 break;
1366 case ISD::SETGE:
1367 case ISD::SETOGE:
1368 CondCode = AArch64CC::GE;
1369 break;
1370 case ISD::SETOLT:
1371 CondCode = AArch64CC::MI;
1372 break;
1373 case ISD::SETOLE:
1374 CondCode = AArch64CC::LS;
1375 break;
1376 case ISD::SETONE:
1377 CondCode = AArch64CC::MI;
1378 CondCode2 = AArch64CC::GT;
1379 break;
1380 case ISD::SETO:
1381 CondCode = AArch64CC::VC;
1382 break;
1383 case ISD::SETUO:
1384 CondCode = AArch64CC::VS;
1385 break;
1386 case ISD::SETUEQ:
1387 CondCode = AArch64CC::EQ;
1388 CondCode2 = AArch64CC::VS;
1389 break;
1390 case ISD::SETUGT:
1391 CondCode = AArch64CC::HI;
1392 break;
1393 case ISD::SETUGE:
1394 CondCode = AArch64CC::PL;
1395 break;
1396 case ISD::SETLT:
1397 case ISD::SETULT:
1398 CondCode = AArch64CC::LT;
1399 break;
1400 case ISD::SETLE:
1401 case ISD::SETULE:
1402 CondCode = AArch64CC::LE;
1403 break;
1404 case ISD::SETNE:
1405 case ISD::SETUNE:
1406 CondCode = AArch64CC::NE;
1407 break;
1408 }
1409}
1410
1411/// Convert a DAG fp condition code to an AArch64 CC.
1412/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1413/// should be AND'ed instead of OR'ed.
1414static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1415 AArch64CC::CondCode &CondCode,
1416 AArch64CC::CondCode &CondCode2) {
1417 CondCode2 = AArch64CC::AL;
1418 switch (CC) {
1419 default:
1420 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1421 assert(CondCode2 == AArch64CC::AL)((CondCode2 == AArch64CC::AL) ? static_cast<void> (0) :
__assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1421, __PRETTY_FUNCTION__))
;
1422 break;
1423 case ISD::SETONE:
1424 // (a one b)
1425 // == ((a olt b) || (a ogt b))
1426 // == ((a ord b) && (a une b))
1427 CondCode = AArch64CC::VC;
1428 CondCode2 = AArch64CC::NE;
1429 break;
1430 case ISD::SETUEQ:
1431 // (a ueq b)
1432 // == ((a uno b) || (a oeq b))
1433 // == ((a ule b) && (a uge b))
1434 CondCode = AArch64CC::PL;
1435 CondCode2 = AArch64CC::LE;
1436 break;
1437 }
1438}
1439
1440/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1441/// CC usable with the vector instructions. Fewer operations are available
1442/// without a real NZCV register, so we have to use less efficient combinations
1443/// to get the same effect.
1444static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1445 AArch64CC::CondCode &CondCode,
1446 AArch64CC::CondCode &CondCode2,
1447 bool &Invert) {
1448 Invert = false;
1449 switch (CC) {
1450 default:
1451 // Mostly the scalar mappings work fine.
1452 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1453 break;
1454 case ISD::SETUO:
1455 Invert = true;
1456 LLVM_FALLTHROUGH[[clang::fallthrough]];
1457 case ISD::SETO:
1458 CondCode = AArch64CC::MI;
1459 CondCode2 = AArch64CC::GE;
1460 break;
1461 case ISD::SETUEQ:
1462 case ISD::SETULT:
1463 case ISD::SETULE:
1464 case ISD::SETUGT:
1465 case ISD::SETUGE:
1466 // All of the compare-mask comparisons are ordered, but we can switch
1467 // between the two by a double inversion. E.g. ULE == !OGT.
1468 Invert = true;
1469 changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
1470 break;
1471 }
1472}
1473
1474static bool isLegalArithImmed(uint64_t C) {
1475 // Matches AArch64DAGToDAGISel::SelectArithImmed().
1476 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1477 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
1478 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
1479 return IsLegal;
1480}
1481
1482// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
1483// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
1484// can be set differently by this operation. It comes down to whether
1485// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1486// everything is fine. If not then the optimization is wrong. Thus general
1487// comparisons are only valid if op2 != 0.
1488//
1489// So, finally, the only LLVM-native comparisons that don't mention C and V
1490// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1491// the absence of information about op2.
1492static bool isCMN(SDValue Op, ISD::CondCode CC) {
1493 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
1494 (CC == ISD::SETEQ || CC == ISD::SETNE);
1495}
1496
1497static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1498 const SDLoc &dl, SelectionDAG &DAG) {
1499 EVT VT = LHS.getValueType();
1500 const bool FullFP16 =
1501 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1502
1503 if (VT.isFloatingPoint()) {
1504 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1504, __PRETTY_FUNCTION__))
;
1505 if (VT == MVT::f16 && !FullFP16) {
1506 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1507 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1508 VT = MVT::f32;
1509 }
1510 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1511 }
1512
1513 // The CMP instruction is just an alias for SUBS, and representing it as
1514 // SUBS means that it's possible to get CSE with subtract operations.
1515 // A later phase can perform the optimization of setting the destination
1516 // register to WZR/XZR if it ends up being unused.
1517 unsigned Opcode = AArch64ISD::SUBS;
1518
1519 if (isCMN(RHS, CC)) {
1520 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
1521 Opcode = AArch64ISD::ADDS;
1522 RHS = RHS.getOperand(1);
1523 } else if (isCMN(LHS, CC)) {
1524 // As we are looking for EQ/NE compares, the operands can be commuted ; can
1525 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
1526 Opcode = AArch64ISD::ADDS;
1527 LHS = LHS.getOperand(1);
1528 } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
1529 !isUnsignedIntSetCC(CC)) {
1530 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1531 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1532 // of the signed comparisons.
1533 Opcode = AArch64ISD::ANDS;
1534 RHS = LHS.getOperand(1);
1535 LHS = LHS.getOperand(0);
1536 }
1537
1538 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1539 .getValue(1);
1540}
1541
1542/// \defgroup AArch64CCMP CMP;CCMP matching
1543///
1544/// These functions deal with the formation of CMP;CCMP;... sequences.
1545/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1546/// a comparison. They set the NZCV flags to a predefined value if their
1547/// predicate is false. This allows to express arbitrary conjunctions, for
1548/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
1549/// expressed as:
1550/// cmp A
1551/// ccmp B, inv(CB), CA
1552/// check for CB flags
1553///
1554/// This naturally lets us implement chains of AND operations with SETCC
1555/// operands. And we can even implement some other situations by transforming
1556/// them:
1557/// - We can implement (NEG SETCC) i.e. negating a single comparison by
1558/// negating the flags used in a CCMP/FCCMP operations.
1559/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
1560/// by negating the flags we test for afterwards. i.e.
1561/// NEG (CMP CCMP CCCMP ...) can be implemented.
1562/// - Note that we can only ever negate all previously processed results.
1563/// What we can not implement by flipping the flags to test is a negation
1564/// of two sub-trees (because the negation affects all sub-trees emitted so
1565/// far, so the 2nd sub-tree we emit would also affect the first).
1566/// With those tools we can implement some OR operations:
1567/// - (OR (SETCC A) (SETCC B)) can be implemented via:
1568/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
1569/// - After transforming OR to NEG/AND combinations we may be able to use NEG
1570/// elimination rules from earlier to implement the whole thing as a
1571/// CCMP/FCCMP chain.
1572///
1573/// As complete example:
1574/// or (or (setCA (cmp A)) (setCB (cmp B)))
1575/// (and (setCC (cmp C)) (setCD (cmp D)))"
1576/// can be reassociated to:
1577/// or (and (setCC (cmp C)) setCD (cmp D))
1578// (or (setCA (cmp A)) (setCB (cmp B)))
1579/// can be transformed to:
1580/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
1581/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1582/// which can be implemented as:
1583/// cmp C
1584/// ccmp D, inv(CD), CC
1585/// ccmp A, CA, inv(CD)
1586/// ccmp B, CB, inv(CA)
1587/// check for CB flags
1588///
1589/// A counterexample is "or (and A B) (and C D)" which translates to
1590/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
1591/// can only implement 1 of the inner (not) operations, but not both!
1592/// @{
1593
1594/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1595static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1596 ISD::CondCode CC, SDValue CCOp,
1597 AArch64CC::CondCode Predicate,
1598 AArch64CC::CondCode OutCC,
1599 const SDLoc &DL, SelectionDAG &DAG) {
1600 unsigned Opcode = 0;
1601 const bool FullFP16 =
1602 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1603
1604 if (LHS.getValueType().isFloatingPoint()) {
1605 assert(LHS.getValueType() != MVT::f128)((LHS.getValueType() != MVT::f128) ? static_cast<void> (
0) : __assert_fail ("LHS.getValueType() != MVT::f128", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1605, __PRETTY_FUNCTION__))
;
1606 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
1607 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1608 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1609 }
1610 Opcode = AArch64ISD::FCCMP;
1611 } else if (RHS.getOpcode() == ISD::SUB) {
1612 SDValue SubOp0 = RHS.getOperand(0);
1613 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1614 // See emitComparison() on why we can only do this for SETEQ and SETNE.
1615 Opcode = AArch64ISD::CCMN;
1616 RHS = RHS.getOperand(1);
1617 }
1618 }
1619 if (Opcode == 0)
1620 Opcode = AArch64ISD::CCMP;
1621
1622 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1623 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1624 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1625 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1626 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1627}
1628
1629/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
1630/// expressed as a conjunction. See \ref AArch64CCMP.
1631/// \param CanNegate Set to true if we can negate the whole sub-tree just by
1632/// changing the conditions on the SETCC tests.
1633/// (this means we can call emitConjunctionRec() with
1634/// Negate==true on this sub-tree)
1635/// \param MustBeFirst Set to true if this subtree needs to be negated and we
1636/// cannot do the negation naturally. We are required to
1637/// emit the subtree first in this case.
1638/// \param WillNegate Is true if are called when the result of this
1639/// subexpression must be negated. This happens when the
1640/// outer expression is an OR. We can use this fact to know
1641/// that we have a double negation (or (or ...) ...) that
1642/// can be implemented for free.
1643static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
1644 bool &MustBeFirst, bool WillNegate,
1645 unsigned Depth = 0) {
1646 if (!Val.hasOneUse())
1647 return false;
1648 unsigned Opcode = Val->getOpcode();
1649 if (Opcode == ISD::SETCC) {
1650 if (Val->getOperand(0).getValueType() == MVT::f128)
1651 return false;
1652 CanNegate = true;
1653 MustBeFirst = false;
1654 return true;
1655 }
1656 // Protect against exponential runtime and stack overflow.
1657 if (Depth > 6)
1658 return false;
1659 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1660 bool IsOR = Opcode == ISD::OR;
1661 SDValue O0 = Val->getOperand(0);
1662 SDValue O1 = Val->getOperand(1);
1663 bool CanNegateL;
1664 bool MustBeFirstL;
1665 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
1666 return false;
1667 bool CanNegateR;
1668 bool MustBeFirstR;
1669 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
1670 return false;
1671
1672 if (MustBeFirstL && MustBeFirstR)
1673 return false;
1674
1675 if (IsOR) {
1676 // For an OR expression we need to be able to naturally negate at least
1677 // one side or we cannot do the transformation at all.
1678 if (!CanNegateL && !CanNegateR)
1679 return false;
1680 // If we the result of the OR will be negated and we can naturally negate
1681 // the leafs, then this sub-tree as a whole negates naturally.
1682 CanNegate = WillNegate && CanNegateL && CanNegateR;
1683 // If we cannot naturally negate the whole sub-tree, then this must be
1684 // emitted first.
1685 MustBeFirst = !CanNegate;
1686 } else {
1687 assert(Opcode == ISD::AND && "Must be OR or AND")((Opcode == ISD::AND && "Must be OR or AND") ? static_cast
<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1687, __PRETTY_FUNCTION__))
;
1688 // We cannot naturally negate an AND operation.
1689 CanNegate = false;
1690 MustBeFirst = MustBeFirstL || MustBeFirstR;
1691 }
1692 return true;
1693 }
1694 return false;
1695}
1696
1697/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1698/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1699/// Tries to transform the given i1 producing node @p Val to a series compare
1700/// and conditional compare operations. @returns an NZCV flags producing node
1701/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1702/// transformation was not possible.
1703/// \p Negate is true if we want this sub-tree being negated just by changing
1704/// SETCC conditions.
1705static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
1706 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1707 AArch64CC::CondCode Predicate) {
1708 // We're at a tree leaf, produce a conditional comparison operation.
1709 unsigned Opcode = Val->getOpcode();
1710 if (Opcode == ISD::SETCC) {
1711 SDValue LHS = Val->getOperand(0);
1712 SDValue RHS = Val->getOperand(1);
1713 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1714 bool isInteger = LHS.getValueType().isInteger();
1715 if (Negate)
1716 CC = getSetCCInverse(CC, isInteger);
1717 SDLoc DL(Val);
1718 // Determine OutCC and handle FP special case.
1719 if (isInteger) {
1720 OutCC = changeIntCCToAArch64CC(CC);
1721 } else {
1722 assert(LHS.getValueType().isFloatingPoint())((LHS.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1722, __PRETTY_FUNCTION__))
;
1723 AArch64CC::CondCode ExtraCC;
1724 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1725 // Some floating point conditions can't be tested with a single condition
1726 // code. Construct an additional comparison in this case.
1727 if (ExtraCC != AArch64CC::AL) {
1728 SDValue ExtraCmp;
1729 if (!CCOp.getNode())
1730 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1731 else
1732 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1733 ExtraCC, DL, DAG);
1734 CCOp = ExtraCmp;
1735 Predicate = ExtraCC;
1736 }
1737 }
1738
1739 // Produce a normal comparison if we are first in the chain
1740 if (!CCOp)
1741 return emitComparison(LHS, RHS, CC, DL, DAG);
1742 // Otherwise produce a ccmp.
1743 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
1744 DAG);
1745 }
1746 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")((Val->hasOneUse() && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1746, __PRETTY_FUNCTION__))
;
1747
1748 bool IsOR = Opcode == ISD::OR;
1749
1750 SDValue LHS = Val->getOperand(0);
1751 bool CanNegateL;
1752 bool MustBeFirstL;
1753 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
1754 assert(ValidL && "Valid conjunction/disjunction tree")((ValidL && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1754, __PRETTY_FUNCTION__))
;
1755 (void)ValidL;
1756
1757 SDValue RHS = Val->getOperand(1);
1758 bool CanNegateR;
1759 bool MustBeFirstR;
1760 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
1761 assert(ValidR && "Valid conjunction/disjunction tree")((ValidR && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1761, __PRETTY_FUNCTION__))
;
1762 (void)ValidR;
1763
1764 // Swap sub-tree that must come first to the right side.
1765 if (MustBeFirstL) {
1766 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")((!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1766, __PRETTY_FUNCTION__))
;
1767 std::swap(LHS, RHS);
1768 std::swap(CanNegateL, CanNegateR);
1769 std::swap(MustBeFirstL, MustBeFirstR);
1770 }
1771
1772 bool NegateR;
1773 bool NegateAfterR;
1774 bool NegateL;
1775 bool NegateAfterAll;
1776 if (Opcode == ISD::OR) {
1777 // Swap the sub-tree that we can negate naturally to the left.
1778 if (!CanNegateL) {
1779 assert(CanNegateR && "at least one side must be negatable")((CanNegateR && "at least one side must be negatable"
) ? static_cast<void> (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1779, __PRETTY_FUNCTION__))
;
1780 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")((!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1780, __PRETTY_FUNCTION__))
;
1781 assert(!Negate)((!Negate) ? static_cast<void> (0) : __assert_fail ("!Negate"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1781, __PRETTY_FUNCTION__))
;
1782 std::swap(LHS, RHS);
1783 NegateR = false;
1784 NegateAfterR = true;
1785 } else {
1786 // Negate the left sub-tree if possible, otherwise negate the result.
1787 NegateR = CanNegateR;
1788 NegateAfterR = !CanNegateR;
1789 }
1790 NegateL = true;
1791 NegateAfterAll = !Negate;
1792 } else {
1793 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")((Opcode == ISD::AND && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1793, __PRETTY_FUNCTION__))
;
1794 assert(!Negate && "Valid conjunction/disjunction tree")((!Negate && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1794, __PRETTY_FUNCTION__))
;
1795
1796 NegateL = false;
1797 NegateR = false;
1798 NegateAfterR = false;
1799 NegateAfterAll = false;
1800 }
1801
1802 // Emit sub-trees.
1803 AArch64CC::CondCode RHSCC;
1804 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
1805 if (NegateAfterR)
1806 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1807 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
1808 if (NegateAfterAll)
1809 OutCC = AArch64CC::getInvertedCondCode(OutCC);
1810 return CmpL;
1811}
1812
1813/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
1814/// In some cases this is even possible with OR operations in the expression.
1815/// See \ref AArch64CCMP.
1816/// \see emitConjunctionRec().
1817static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
1818 AArch64CC::CondCode &OutCC) {
1819 bool DummyCanNegate;
1820 bool DummyMustBeFirst;
1821 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
1822 return SDValue();
1823
1824 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
1825}
1826
1827/// @}
1828
1829/// Returns how profitable it is to fold a comparison's operand's shift and/or
1830/// extension operations.
1831static unsigned getCmpOperandFoldingProfit(SDValue Op) {
1832 auto isSupportedExtend = [&](SDValue V) {
1833 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
1834 return true;
1835
1836 if (V.getOpcode() == ISD::AND)
1837 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
1838 uint64_t Mask = MaskCst->getZExtValue();
1839 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
1840 }
1841
1842 return false;
1843 };
1844
1845 if (!Op.hasOneUse())
1846 return 0;
1847
1848 if (isSupportedExtend(Op))
1849 return 1;
1850
1851 unsigned Opc = Op.getOpcode();
1852 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
1853 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1854 uint64_t Shift = ShiftCst->getZExtValue();
1855 if (isSupportedExtend(Op.getOperand(0)))
1856 return (Shift <= 4) ? 2 : 1;
1857 EVT VT = Op.getValueType();
1858 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
1859 return 1;
1860 }
1861
1862 return 0;
1863}
1864
1865static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1866 SDValue &AArch64cc, SelectionDAG &DAG,
1867 const SDLoc &dl) {
1868 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1869 EVT VT = RHS.getValueType();
1870 uint64_t C = RHSC->getZExtValue();
1871 if (!isLegalArithImmed(C)) {
1872 // Constant does not fit, try adjusting it by one?
1873 switch (CC) {
1874 default:
1875 break;
1876 case ISD::SETLT:
1877 case ISD::SETGE:
1878 if ((VT == MVT::i32 && C != 0x80000000 &&
1879 isLegalArithImmed((uint32_t)(C - 1))) ||
1880 (VT == MVT::i64 && C != 0x80000000ULL &&
1881 isLegalArithImmed(C - 1ULL))) {
1882 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1883 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1884 RHS = DAG.getConstant(C, dl, VT);
1885 }
1886 break;
1887 case ISD::SETULT:
1888 case ISD::SETUGE:
1889 if ((VT == MVT::i32 && C != 0 &&
1890 isLegalArithImmed((uint32_t)(C - 1))) ||
1891 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
1892 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1893 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1894 RHS = DAG.getConstant(C, dl, VT);
1895 }
1896 break;
1897 case ISD::SETLE:
1898 case ISD::SETGT:
1899 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
1900 isLegalArithImmed((uint32_t)(C + 1))) ||
1901 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
1902 isLegalArithImmed(C + 1ULL))) {
1903 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1904 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1905 RHS = DAG.getConstant(C, dl, VT);
1906 }
1907 break;
1908 case ISD::SETULE:
1909 case ISD::SETUGT:
1910 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
1911 isLegalArithImmed((uint32_t)(C + 1))) ||
1912 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
1913 isLegalArithImmed(C + 1ULL))) {
1914 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1915 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1916 RHS = DAG.getConstant(C, dl, VT);
1917 }
1918 break;
1919 }
1920 }
1921 }
1922
1923 // Comparisons are canonicalized so that the RHS operand is simpler than the
1924 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
1925 // can fold some shift+extend operations on the RHS operand, so swap the
1926 // operands if that can be done.
1927 //
1928 // For example:
1929 // lsl w13, w11, #1
1930 // cmp w13, w12
1931 // can be turned into:
1932 // cmp w12, w11, lsl #1
1933 if (!isa<ConstantSDNode>(RHS) ||
1934 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
1935 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
1936
1937 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
1938 std::swap(LHS, RHS);
1939 CC = ISD::getSetCCSwappedOperands(CC);
1940 }
1941 }
1942
1943 SDValue Cmp;
1944 AArch64CC::CondCode AArch64CC;
1945 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
1946 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
1947
1948 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
1949 // For the i8 operand, the largest immediate is 255, so this can be easily
1950 // encoded in the compare instruction. For the i16 operand, however, the
1951 // largest immediate cannot be encoded in the compare.
1952 // Therefore, use a sign extending load and cmn to avoid materializing the
1953 // -1 constant. For example,
1954 // movz w1, #65535
1955 // ldrh w0, [x0, #0]
1956 // cmp w0, w1
1957 // >
1958 // ldrsh w0, [x0, #0]
1959 // cmn w0, #1
1960 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
1961 // if and only if (sext LHS) == (sext RHS). The checks are in place to
1962 // ensure both the LHS and RHS are truly zero extended and to make sure the
1963 // transformation is profitable.
1964 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
1965 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
1966 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
1967 LHS.getNode()->hasNUsesOfValue(1, 0)) {
1968 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
1969 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
1970 SDValue SExt =
1971 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
1972 DAG.getValueType(MVT::i16));
1973 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
1974 RHS.getValueType()),
1975 CC, dl, DAG);
1976 AArch64CC = changeIntCCToAArch64CC(CC);
1977 }
1978 }
1979
1980 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
1981 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
1982 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
1983 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
1984 }
1985 }
1986 }
1987
1988 if (!Cmp) {
1989 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
1990 AArch64CC = changeIntCCToAArch64CC(CC);
1991 }
1992 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
1993 return Cmp;
1994}
1995
1996static std::pair<SDValue, SDValue>
1997getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
1998 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1999, __PRETTY_FUNCTION__))
1999 "Unsupported value type")(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1999, __PRETTY_FUNCTION__))
;
2000 SDValue Value, Overflow;
2001 SDLoc DL(Op);
2002 SDValue LHS = Op.getOperand(0);
2003 SDValue RHS = Op.getOperand(1);
2004 unsigned Opc = 0;
2005 switch (Op.getOpcode()) {
2006 default:
2007 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2007)
;
2008 case ISD::SADDO:
2009 Opc = AArch64ISD::ADDS;
2010 CC = AArch64CC::VS;
2011 break;
2012 case ISD::UADDO:
2013 Opc = AArch64ISD::ADDS;
2014 CC = AArch64CC::HS;
2015 break;
2016 case ISD::SSUBO:
2017 Opc = AArch64ISD::SUBS;
2018 CC = AArch64CC::VS;
2019 break;
2020 case ISD::USUBO:
2021 Opc = AArch64ISD::SUBS;
2022 CC = AArch64CC::LO;
2023 break;
2024 // Multiply needs a little bit extra work.
2025 case ISD::SMULO:
2026 case ISD::UMULO: {
2027 CC = AArch64CC::NE;
2028 bool IsSigned = Op.getOpcode() == ISD::SMULO;
2029 if (Op.getValueType() == MVT::i32) {
2030 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2031 // For a 32 bit multiply with overflow check we want the instruction
2032 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
2033 // need to generate the following pattern:
2034 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
2035 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
2036 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
2037 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2038 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
2039 DAG.getConstant(0, DL, MVT::i64));
2040 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
2041 // operation. We need to clear out the upper 32 bits, because we used a
2042 // widening multiply that wrote all 64 bits. In the end this should be a
2043 // noop.
2044 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
2045 if (IsSigned) {
2046 // The signed overflow check requires more than just a simple check for
2047 // any bit set in the upper 32 bits of the result. These bits could be
2048 // just the sign bits of a negative number. To perform the overflow
2049 // check we have to arithmetic shift right the 32nd bit of the result by
2050 // 31 bits. Then we compare the result to the upper 32 bits.
2051 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
2052 DAG.getConstant(32, DL, MVT::i64));
2053 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
2054 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
2055 DAG.getConstant(31, DL, MVT::i64));
2056 // It is important that LowerBits is last, otherwise the arithmetic
2057 // shift will not be folded into the compare (SUBS).
2058 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
2059 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2060 .getValue(1);
2061 } else {
2062 // The overflow check for unsigned multiply is easy. We only need to
2063 // check if any of the upper 32 bits are set. This can be done with a
2064 // CMP (shifted register). For that we need to generate the following
2065 // pattern:
2066 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
2067 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2068 DAG.getConstant(32, DL, MVT::i64));
2069 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2070 Overflow =
2071 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2072 DAG.getConstant(0, DL, MVT::i64),
2073 UpperBits).getValue(1);
2074 }
2075 break;
2076 }
2077 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")((Op.getValueType() == MVT::i64 && "Expected an i64 value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2077, __PRETTY_FUNCTION__))
;
2078 // For the 64 bit multiply
2079 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2080 if (IsSigned) {
2081 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
2082 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
2083 DAG.getConstant(63, DL, MVT::i64));
2084 // It is important that LowerBits is last, otherwise the arithmetic
2085 // shift will not be folded into the compare (SUBS).
2086 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2087 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2088 .getValue(1);
2089 } else {
2090 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
2091 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2092 Overflow =
2093 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2094 DAG.getConstant(0, DL, MVT::i64),
2095 UpperBits).getValue(1);
2096 }
2097 break;
2098 }
2099 } // switch (...)
2100
2101 if (Opc) {
2102 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
2103
2104 // Emit the AArch64 operation with overflow check.
2105 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
2106 Overflow = Value.getValue(1);
2107 }
2108 return std::make_pair(Value, Overflow);
2109}
2110
2111SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
2112 RTLIB::Libcall Call) const {
2113 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2114 return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
2115}
2116
2117// Returns true if the given Op is the overflow flag result of an overflow
2118// intrinsic operation.
2119static bool isOverflowIntrOpRes(SDValue Op) {
2120 unsigned Opc = Op.getOpcode();
2121 return (Op.getResNo() == 1 &&
2122 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2123 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2124}
2125
2126static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
2127 SDValue Sel = Op.getOperand(0);
2128 SDValue Other = Op.getOperand(1);
2129 SDLoc dl(Sel);
2130
2131 // If the operand is an overflow checking operation, invert the condition
2132 // code and kill the Not operation. I.e., transform:
2133 // (xor (overflow_op_bool, 1))
2134 // -->
2135 // (csel 1, 0, invert(cc), overflow_op_bool)
2136 // ... which later gets transformed to just a cset instruction with an
2137 // inverted condition code, rather than a cset + eor sequence.
2138 if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
2139 // Only lower legal XALUO ops.
2140 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2141 return SDValue();
2142
2143 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2144 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2145 AArch64CC::CondCode CC;
2146 SDValue Value, Overflow;
2147 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2148 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2149 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2150 CCVal, Overflow);
2151 }
2152 // If neither operand is a SELECT_CC, give up.
2153 if (Sel.getOpcode() != ISD::SELECT_CC)
2154 std::swap(Sel, Other);
2155 if (Sel.getOpcode() != ISD::SELECT_CC)
2156 return Op;
2157
2158 // The folding we want to perform is:
2159 // (xor x, (select_cc a, b, cc, 0, -1) )
2160 // -->
2161 // (csel x, (xor x, -1), cc ...)
2162 //
2163 // The latter will get matched to a CSINV instruction.
2164
2165 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2166 SDValue LHS = Sel.getOperand(0);
2167 SDValue RHS = Sel.getOperand(1);
2168 SDValue TVal = Sel.getOperand(2);
2169 SDValue FVal = Sel.getOperand(3);
2170
2171 // FIXME: This could be generalized to non-integer comparisons.
2172 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2173 return Op;
2174
2175 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2176 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2177
2178 // The values aren't constants, this isn't the pattern we're looking for.
2179 if (!CFVal || !CTVal)
2180 return Op;
2181
2182 // We can commute the SELECT_CC by inverting the condition. This
2183 // might be needed to make this fit into a CSINV pattern.
2184 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2185 std::swap(TVal, FVal);
2186 std::swap(CTVal, CFVal);
2187 CC = ISD::getSetCCInverse(CC, true);
2188 }
2189
2190 // If the constants line up, perform the transform!
2191 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2192 SDValue CCVal;
2193 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2194
2195 FVal = Other;
2196 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2197 DAG.getConstant(-1ULL, dl, Other.getValueType()));
2198
2199 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2200 CCVal, Cmp);
2201 }
2202
2203 return Op;
2204}
2205
2206static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2207 EVT VT = Op.getValueType();
2208
2209 // Let legalize expand this if it isn't a legal type yet.
2210 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2211 return SDValue();
2212
2213 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2214
2215 unsigned Opc;
2216 bool ExtraOp = false;
2217 switch (Op.getOpcode()) {
2218 default:
2219 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2219)
;
2220 case ISD::ADDC:
2221 Opc = AArch64ISD::ADDS;
2222 break;
2223 case ISD::SUBC:
2224 Opc = AArch64ISD::SUBS;
2225 break;
2226 case ISD::ADDE:
2227 Opc = AArch64ISD::ADCS;
2228 ExtraOp = true;
2229 break;
2230 case ISD::SUBE:
2231 Opc = AArch64ISD::SBCS;
2232 ExtraOp = true;
2233 break;
2234 }
2235
2236 if (!ExtraOp)
2237 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2238 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2239 Op.getOperand(2));
2240}
2241
2242static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
2243 // Let legalize expand this if it isn't a legal type yet.
2244 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
2245 return SDValue();
2246
2247 SDLoc dl(Op);
2248 AArch64CC::CondCode CC;
2249 // The actual operation that sets the overflow or carry flag.
2250 SDValue Value, Overflow;
2251 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2252
2253 // We use 0 and 1 as false and true values.
2254 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2255 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2256
2257 // We use an inverted condition, because the conditional select is inverted
2258 // too. This will allow it to be selected to a single instruction:
2259 // CSINC Wd, WZR, WZR, invert(cond).
2260 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2261 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2262 CCVal, Overflow);
2263
2264 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2265 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2266}
2267
2268// Prefetch operands are:
2269// 1: Address to prefetch
2270// 2: bool isWrite
2271// 3: int locality (0 = no locality ... 3 = extreme locality)
2272// 4: bool isDataCache
2273static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
2274 SDLoc DL(Op);
2275 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2276 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2277 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2278
2279 bool IsStream = !Locality;
2280 // When the locality number is set
2281 if (Locality) {
2282 // The front-end should have filtered out the out-of-range values
2283 assert(Locality <= 3 && "Prefetch locality out-of-range")((Locality <= 3 && "Prefetch locality out-of-range"
) ? static_cast<void> (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2283, __PRETTY_FUNCTION__))
;
2284 // The locality degree is the opposite of the cache speed.
2285 // Put the number the other way around.
2286 // The encoding starts at 0 for level 1
2287 Locality = 3 - Locality;
2288 }
2289
2290 // built the mask value encoding the expected behavior.
2291 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
2292 (!IsData << 3) | // IsDataCache bit
2293 (Locality << 1) | // Cache level bits
2294 (unsigned)IsStream; // Stream bit
2295 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2296 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2297}
2298
2299SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2300 SelectionDAG &DAG) const {
2301 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")((Op.getValueType() == MVT::f128 && "Unexpected lowering"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2301, __PRETTY_FUNCTION__))
;
2302
2303 RTLIB::Libcall LC;
2304 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
2305
2306 return LowerF128Call(Op, DAG, LC);
2307}
2308
2309SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2310 SelectionDAG &DAG) const {
2311 if (Op.getOperand(0).getValueType() != MVT::f128) {
2312 // It's legal except when f128 is involved
2313 return Op;
2314 }
2315
2316 RTLIB::Libcall LC;
2317 LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
2318
2319 // FP_ROUND node has a second operand indicating whether it is known to be
2320 // precise. That doesn't take part in the LibCall so we can't directly use
2321 // LowerF128Call.
2322 SDValue SrcVal = Op.getOperand(0);
2323 return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
2324 SDLoc(Op)).first;
2325}
2326
2327static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2328 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2329 // Any additional optimization in this function should be recorded
2330 // in the cost tables.
2331 EVT InVT = Op.getOperand(0).getValueType();
2332 EVT VT = Op.getValueType();
2333 unsigned NumElts = InVT.getVectorNumElements();
2334
2335 // f16 vectors are promoted to f32 before a conversion.
2336 if (InVT.getVectorElementType() == MVT::f16) {
2337 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2338 SDLoc dl(Op);
2339 return DAG.getNode(
2340 Op.getOpcode(), dl, Op.getValueType(),
2341 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2342 }
2343
2344 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2345 SDLoc dl(Op);
2346 SDValue Cv =
2347 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2348 Op.getOperand(0));
2349 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2350 }
2351
2352 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2353 SDLoc dl(Op);
2354 MVT ExtVT =
2355 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
2356 VT.getVectorNumElements());
2357 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2358 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2359 }
2360
2361 // Type changing conversions are illegal.
2362 return Op;
2363}
2364
2365SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2366 SelectionDAG &DAG) const {
2367 if (Op.getOperand(0).getValueType().isVector())
2368 return LowerVectorFP_TO_INT(Op, DAG);
2369
2370 // f16 conversions are promoted to f32 when full fp16 is not supported.
2371 if (Op.getOperand(0).getValueType() == MVT::f16 &&
2372 !Subtarget->hasFullFP16()) {
2373 SDLoc dl(Op);
2374 return DAG.getNode(
2375 Op.getOpcode(), dl, Op.getValueType(),
2376 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
2377 }
2378
2379 if (Op.getOperand(0).getValueType() != MVT::f128) {
2380 // It's legal except when f128 is involved
2381 return Op;
2382 }
2383
2384 RTLIB::Libcall LC;
2385 if (Op.getOpcode() == ISD::FP_TO_SINT)
2386 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
2387 else
2388 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
2389
2390 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2391 return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
2392}
2393
2394static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2395 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2396 // Any additional optimization in this function should be recorded
2397 // in the cost tables.
2398 EVT VT = Op.getValueType();
2399 SDLoc dl(Op);
2400 SDValue In = Op.getOperand(0);
2401 EVT InVT = In.getValueType();
2402
2403 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2404 MVT CastVT =
2405 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
2406 InVT.getVectorNumElements());
2407 In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2408 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2409 }
2410
2411 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2412 unsigned CastOpc =
2413 Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2414 EVT CastVT = VT.changeVectorElementTypeToInteger();
2415 In = DAG.getNode(CastOpc, dl, CastVT, In);
2416 return DAG.getNode(Op.getOpcode(), dl, VT, In);
2417 }
2418
2419 return Op;
2420}
2421
2422SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2423 SelectionDAG &DAG) const {
2424 if (Op.getValueType().isVector())
2425 return LowerVectorINT_TO_FP(Op, DAG);
2426
2427 // f16 conversions are promoted to f32 when full fp16 is not supported.
2428 if (Op.getValueType() == MVT::f16 &&
2429 !Subtarget->hasFullFP16()) {
2430 SDLoc dl(Op);
2431 return DAG.getNode(
2432 ISD::FP_ROUND, dl, MVT::f16,
2433 DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
2434 DAG.getIntPtrConstant(0, dl));
2435 }
2436
2437 // i128 conversions are libcalls.
2438 if (Op.getOperand(0).getValueType() == MVT::i128)
2439 return SDValue();
2440
2441 // Other conversions are legal, unless it's to the completely software-based
2442 // fp128.
2443 if (Op.getValueType() != MVT::f128)
2444 return Op;
2445
2446 RTLIB::Libcall LC;
2447 if (Op.getOpcode() == ISD::SINT_TO_FP)
2448 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2449 else
2450 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2451
2452 return LowerF128Call(Op, DAG, LC);
2453}
2454
2455SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2456 SelectionDAG &DAG) const {
2457 // For iOS, we want to call an alternative entry point: __sincos_stret,
2458 // which returns the values in two S / D registers.
2459 SDLoc dl(Op);
2460 SDValue Arg = Op.getOperand(0);
2461 EVT ArgVT = Arg.getValueType();
2462 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2463
2464 ArgListTy Args;
2465 ArgListEntry Entry;
2466
2467 Entry.Node = Arg;
2468 Entry.Ty = ArgTy;
2469 Entry.IsSExt = false;
2470 Entry.IsZExt = false;
2471 Args.push_back(Entry);
2472
2473 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
2474 : RTLIB::SINCOS_STRET_F32;
2475 const char *LibcallName = getLibcallName(LC);
2476 SDValue Callee =
2477 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2478
2479 StructType *RetTy = StructType::get(ArgTy, ArgTy);
2480 TargetLowering::CallLoweringInfo CLI(DAG);
2481 CLI.setDebugLoc(dl)
2482 .setChain(DAG.getEntryNode())
2483 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2484
2485 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2486 return CallResult.first;
2487}
2488
2489static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2490 if (Op.getValueType() != MVT::f16)
2491 return SDValue();
2492
2493 assert(Op.getOperand(0).getValueType() == MVT::i16)((Op.getOperand(0).getValueType() == MVT::i16) ? static_cast<
void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2493, __PRETTY_FUNCTION__))
;
2494 SDLoc DL(Op);
2495
2496 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2497 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2498 return SDValue(
2499 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2500 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2501 0);
2502}
2503
2504static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2505 if (OrigVT.getSizeInBits() >= 64)
2506 return OrigVT;
2507
2508 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2508, __PRETTY_FUNCTION__))
;
2509
2510 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2511 switch (OrigSimpleTy) {
2512 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2512)
;
2513 case MVT::v2i8:
2514 case MVT::v2i16:
2515 return MVT::v2i32;
2516 case MVT::v4i8:
2517 return MVT::v4i16;
2518 }
2519}
2520
2521static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2522 const EVT &OrigTy,
2523 const EVT &ExtTy,
2524 unsigned ExtOpcode) {
2525 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2526 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2527 // 64-bits we need to insert a new extension so that it will be 64-bits.
2528 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2528, __PRETTY_FUNCTION__))
;
2529 if (OrigTy.getSizeInBits() >= 64)
2530 return N;
2531
2532 // Must extend size to at least 64 bits to be used as an operand for VMULL.
2533 EVT NewVT = getExtensionTo64Bits(OrigTy);
2534
2535 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2536}
2537
2538static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2539 bool isSigned) {
2540 EVT VT = N->getValueType(0);
2541
2542 if (N->getOpcode() != ISD::BUILD_VECTOR)
2543 return false;
2544
2545 for (const SDValue &Elt : N->op_values()) {
2546 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2547 unsigned EltSize = VT.getScalarSizeInBits();
2548 unsigned HalfSize = EltSize / 2;
2549 if (isSigned) {
2550 if (!isIntN(HalfSize, C->getSExtValue()))
2551 return false;
2552 } else {
2553 if (!isUIntN(HalfSize, C->getZExtValue()))
2554 return false;
2555 }
2556 continue;
2557 }
2558 return false;
2559 }
2560
2561 return true;
2562}
2563
2564static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2565 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2566 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2567 N->getOperand(0)->getValueType(0),
2568 N->getValueType(0),
2569 N->getOpcode());
2570
2571 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2571, __PRETTY_FUNCTION__))
;
2572 EVT VT = N->getValueType(0);
2573 SDLoc dl(N);
2574 unsigned EltSize = VT.getScalarSizeInBits() / 2;
2575 unsigned NumElts = VT.getVectorNumElements();
2576 MVT TruncVT = MVT::getIntegerVT(EltSize);
2577 SmallVector<SDValue, 8> Ops;
2578 for (unsigned i = 0; i != NumElts; ++i) {
2579 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2580 const APInt &CInt = C->getAPIntValue();
2581 // Element types smaller than 32 bits are not legal, so use i32 elements.
2582 // The values are implicitly truncated so sext vs. zext doesn't matter.
2583 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2584 }
2585 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2586}
2587
2588static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2589 return N->getOpcode() == ISD::SIGN_EXTEND ||
2590 isExtendedBUILD_VECTOR(N, DAG, true);
2591}
2592
2593static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2594 return N->getOpcode() == ISD::ZERO_EXTEND ||
2595 isExtendedBUILD_VECTOR(N, DAG, false);
2596}
2597
2598static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2599 unsigned Opcode = N->getOpcode();
2600 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2601 SDNode *N0 = N->getOperand(0).getNode();
2602 SDNode *N1 = N->getOperand(1).getNode();
2603 return N0->hasOneUse() && N1->hasOneUse() &&
2604 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2605 }
2606 return false;
2607}
2608
2609static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2610 unsigned Opcode = N->getOpcode();
2611 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2612 SDNode *N0 = N->getOperand(0).getNode();
2613 SDNode *N1 = N->getOperand(1).getNode();
2614 return N0->hasOneUse() && N1->hasOneUse() &&
2615 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2616 }
2617 return false;
2618}
2619
2620SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2621 SelectionDAG &DAG) const {
2622 // The rounding mode is in bits 23:22 of the FPSCR.
2623 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2624 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2625 // so that the shift + and get folded into a bitfield extract.
2626 SDLoc dl(Op);
2627
2628 SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
2629 DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl,
2630 MVT::i64));
2631 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
2632 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
2633 DAG.getConstant(1U << 22, dl, MVT::i32));
2634 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2635 DAG.getConstant(22, dl, MVT::i32));
2636 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2637 DAG.getConstant(3, dl, MVT::i32));
2638}
2639
2640static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2641 // Multiplications are only custom-lowered for 128-bit vectors so that
2642 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2643 EVT VT = Op.getValueType();
2644 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2645, __PRETTY_FUNCTION__))
2645 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2645, __PRETTY_FUNCTION__))
;
2646 SDNode *N0 = Op.getOperand(0).getNode();
2647 SDNode *N1 = Op.getOperand(1).getNode();
2648 unsigned NewOpc = 0;
2649 bool isMLA = false;
2650 bool isN0SExt = isSignExtended(N0, DAG);
2651 bool isN1SExt = isSignExtended(N1, DAG);
2652 if (isN0SExt && isN1SExt)
2653 NewOpc = AArch64ISD::SMULL;
2654 else {
2655 bool isN0ZExt = isZeroExtended(N0, DAG);
2656 bool isN1ZExt = isZeroExtended(N1, DAG);
2657 if (isN0ZExt && isN1ZExt)
2658 NewOpc = AArch64ISD::UMULL;
2659 else if (isN1SExt || isN1ZExt) {
2660 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2661 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2662 if (isN1SExt && isAddSubSExt(N0, DAG)) {
2663 NewOpc = AArch64ISD::SMULL;
2664 isMLA = true;
2665 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2666 NewOpc = AArch64ISD::UMULL;
2667 isMLA = true;
2668 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2669 std::swap(N0, N1);
2670 NewOpc = AArch64ISD::UMULL;
2671 isMLA = true;
2672 }
2673 }
2674
2675 if (!NewOpc) {
2676 if (VT == MVT::v2i64)
2677 // Fall through to expand this. It is not legal.
2678 return SDValue();
2679 else
2680 // Other vector multiplications are legal.
2681 return Op;
2682 }
2683 }
2684
2685 // Legalize to a S/UMULL instruction
2686 SDLoc DL(Op);
2687 SDValue Op0;
2688 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2689 if (!isMLA) {
2690 Op0 = skipExtensionForVectorMULL(N0, DAG);
2691 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2693, __PRETTY_FUNCTION__))
2692 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2693, __PRETTY_FUNCTION__))
2693 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2693, __PRETTY_FUNCTION__))
;
2694 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2695 }
2696 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2697 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2698 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2699 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2700 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2701 EVT Op1VT = Op1.getValueType();
2702 return DAG.getNode(N0->getOpcode(), DL, VT,
2703 DAG.getNode(NewOpc, DL, VT,
2704 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2705 DAG.getNode(NewOpc, DL, VT,
2706 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2707}
2708
2709SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2710 SelectionDAG &DAG) const {
2711 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2712 SDLoc dl(Op);
2713 switch (IntNo) {
2714 default: return SDValue(); // Don't custom lower most intrinsics.
2715 case Intrinsic::thread_pointer: {
2716 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2717 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2718 }
2719 case Intrinsic::aarch64_neon_abs:
2720 return DAG.getNode(ISD::ABS, dl, Op.getValueType(),
2721 Op.getOperand(1));
2722 case Intrinsic::aarch64_neon_smax:
2723 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2724 Op.getOperand(1), Op.getOperand(2));
2725 case Intrinsic::aarch64_neon_umax:
2726 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2727 Op.getOperand(1), Op.getOperand(2));
2728 case Intrinsic::aarch64_neon_smin:
2729 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2730 Op.getOperand(1), Op.getOperand(2));
2731 case Intrinsic::aarch64_neon_umin:
2732 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2733 Op.getOperand(1), Op.getOperand(2));
2734 }
2735}
2736
2737// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
2738static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
2739 EVT VT, EVT MemVT,
2740 SelectionDAG &DAG) {
2741 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2741, __PRETTY_FUNCTION__))
;
2742 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)((MemVT == MVT::v4i8 && VT == MVT::v4i16) ? static_cast
<void> (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2742, __PRETTY_FUNCTION__))
;
2743
2744 SDValue Value = ST->getValue();
2745
2746 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
2747 // the word lane which represent the v4i8 subvector. It optimizes the store
2748 // to:
2749 //
2750 // xtn v0.8b, v0.8h
2751 // str s0, [x0]
2752
2753 SDValue Undef = DAG.getUNDEF(MVT::i16);
2754 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
2755 {Undef, Undef, Undef, Undef});
2756
2757 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
2758 Value, UndefVec);
2759 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
2760
2761 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
2762 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
2763 Trunc, DAG.getConstant(0, DL, MVT::i64));
2764
2765 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
2766 ST->getBasePtr(), ST->getMemOperand());
2767}
2768
2769// Custom lowering for any store, vector or scalar and/or default or with
2770// a truncate operations. Currently only custom lower truncate operation
2771// from vector v4i16 to v4i8.
2772SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
2773 SelectionDAG &DAG) const {
2774 SDLoc Dl(Op);
2775 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
2776 assert (StoreNode && "Can only custom lower store nodes")((StoreNode && "Can only custom lower store nodes") ?
static_cast<void> (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2776, __PRETTY_FUNCTION__))
;
2777
2778 SDValue Value = StoreNode->getValue();
2779
2780 EVT VT = Value.getValueType();
2781 EVT MemVT = StoreNode->getMemoryVT();
2782
2783 assert (VT.isVector() && "Can only custom lower vector store types")((VT.isVector() && "Can only custom lower vector store types"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Can only custom lower vector store types\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2783, __PRETTY_FUNCTION__))
;
2784
2785 unsigned AS = StoreNode->getAddressSpace();
2786 unsigned Align = StoreNode->getAlignment();
2787 if (Align < MemVT.getStoreSize() &&
2788 !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
2789 return scalarizeVectorStore(StoreNode, DAG);
2790 }
2791
2792 if (StoreNode->isTruncatingStore()) {
2793 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
2794 }
2795
2796 return SDValue();
2797}
2798
2799SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
2800 SelectionDAG &DAG) const {
2801 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
2802 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
2803
2804 switch (Op.getOpcode()) {
2805 default:
2806 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2806)
;
2807 return SDValue();
2808 case ISD::BITCAST:
2809 return LowerBITCAST(Op, DAG);
2810 case ISD::GlobalAddress:
2811 return LowerGlobalAddress(Op, DAG);
2812 case ISD::GlobalTLSAddress:
2813 return LowerGlobalTLSAddress(Op, DAG);
2814 case ISD::SETCC:
2815 return LowerSETCC(Op, DAG);
2816 case ISD::BR_CC:
2817 return LowerBR_CC(Op, DAG);
2818 case ISD::SELECT:
2819 return LowerSELECT(Op, DAG);
2820 case ISD::SELECT_CC:
2821 return LowerSELECT_CC(Op, DAG);
2822 case ISD::JumpTable:
2823 return LowerJumpTable(Op, DAG);
2824 case ISD::BR_JT:
2825 return LowerBR_JT(Op, DAG);
2826 case ISD::ConstantPool:
2827 return LowerConstantPool(Op, DAG);
2828 case ISD::BlockAddress:
2829 return LowerBlockAddress(Op, DAG);
2830 case ISD::VASTART:
2831 return LowerVASTART(Op, DAG);
2832 case ISD::VACOPY:
2833 return LowerVACOPY(Op, DAG);
2834 case ISD::VAARG:
2835 return LowerVAARG(Op, DAG);
2836 case ISD::ADDC:
2837 case ISD::ADDE:
2838 case ISD::SUBC:
2839 case ISD::SUBE:
2840 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
2841 case ISD::SADDO:
2842 case ISD::UADDO:
2843 case ISD::SSUBO:
2844 case ISD::USUBO:
2845 case ISD::SMULO:
2846 case ISD::UMULO:
2847 return LowerXALUO(Op, DAG);
2848 case ISD::FADD:
2849 return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
2850 case ISD::FSUB:
2851 return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
2852 case ISD::FMUL:
2853 return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
2854 case ISD::FDIV:
2855 return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
2856 case ISD::FP_ROUND:
2857 return LowerFP_ROUND(Op, DAG);
2858 case ISD::FP_EXTEND:
2859 return LowerFP_EXTEND(Op, DAG);
2860 case ISD::FRAMEADDR:
2861 return LowerFRAMEADDR(Op, DAG);
2862 case ISD::SPONENTRY:
2863 return LowerSPONENTRY(Op, DAG);
2864 case ISD::RETURNADDR:
2865 return LowerRETURNADDR(Op, DAG);
2866 case ISD::ADDROFRETURNADDR:
2867 return LowerADDROFRETURNADDR(Op, DAG);
2868 case ISD::INSERT_VECTOR_ELT:
2869 return LowerINSERT_VECTOR_ELT(Op, DAG);
2870 case ISD::EXTRACT_VECTOR_ELT:
2871 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2872 case ISD::BUILD_VECTOR:
2873 return LowerBUILD_VECTOR(Op, DAG);
2874 case ISD::VECTOR_SHUFFLE:
2875 return LowerVECTOR_SHUFFLE(Op, DAG);
2876 case ISD::EXTRACT_SUBVECTOR:
2877 return LowerEXTRACT_SUBVECTOR(Op, DAG);
2878 case ISD::SRA:
2879 case ISD::SRL:
2880 case ISD::SHL:
2881 return LowerVectorSRA_SRL_SHL(Op, DAG);
2882 case ISD::SHL_PARTS:
2883 return LowerShiftLeftParts(Op, DAG);
2884 case ISD::SRL_PARTS:
2885 case ISD::SRA_PARTS:
2886 return LowerShiftRightParts(Op, DAG);
2887 case ISD::CTPOP:
2888 return LowerCTPOP(Op, DAG);
2889 case ISD::FCOPYSIGN:
2890 return LowerFCOPYSIGN(Op, DAG);
2891 case ISD::AND:
2892 return LowerVectorAND(Op, DAG);
2893 case ISD::OR:
2894 return LowerVectorOR(Op, DAG);
2895 case ISD::XOR:
2896 return LowerXOR(Op, DAG);
2897 case ISD::PREFETCH:
2898 return LowerPREFETCH(Op, DAG);
2899 case ISD::SINT_TO_FP:
2900 case ISD::UINT_TO_FP:
2901 return LowerINT_TO_FP(Op, DAG);
2902 case ISD::FP_TO_SINT:
2903 case ISD::FP_TO_UINT:
2904 return LowerFP_TO_INT(Op, DAG);
2905 case ISD::FSINCOS:
2906 return LowerFSINCOS(Op, DAG);
2907 case ISD::FLT_ROUNDS_:
2908 return LowerFLT_ROUNDS_(Op, DAG);
2909 case ISD::MUL:
2910 return LowerMUL(Op, DAG);
2911 case ISD::INTRINSIC_WO_CHAIN:
2912 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2913 case ISD::STORE:
2914 return LowerSTORE(Op, DAG);
2915 case ISD::VECREDUCE_ADD:
2916 case ISD::VECREDUCE_SMAX:
2917 case ISD::VECREDUCE_SMIN:
2918 case ISD::VECREDUCE_UMAX:
2919 case ISD::VECREDUCE_UMIN:
2920 case ISD::VECREDUCE_FMAX:
2921 case ISD::VECREDUCE_FMIN:
2922 return LowerVECREDUCE(Op, DAG);
2923 case ISD::ATOMIC_LOAD_SUB:
2924 return LowerATOMIC_LOAD_SUB(Op, DAG);
2925 case ISD::ATOMIC_LOAD_AND:
2926 return LowerATOMIC_LOAD_AND(Op, DAG);
2927 case ISD::DYNAMIC_STACKALLOC:
2928 return LowerDYNAMIC_STACKALLOC(Op, DAG);
2929 }
2930}
2931
2932//===----------------------------------------------------------------------===//
2933// Calling Convention Implementation
2934//===----------------------------------------------------------------------===//
2935
2936#include "AArch64GenCallingConv.inc"
2937
2938/// Selects the correct CCAssignFn for a given CallingConvention value.
2939CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2940 bool IsVarArg) const {
2941 switch (CC) {
2942 default:
2943 report_fatal_error("Unsupported calling convention.");
2944 case CallingConv::WebKit_JS:
2945 return CC_AArch64_WebKit_JS;
2946 case CallingConv::GHC:
2947 return CC_AArch64_GHC;
2948 case CallingConv::C:
2949 case CallingConv::Fast:
2950 case CallingConv::PreserveMost:
2951 case CallingConv::CXX_FAST_TLS:
2952 case CallingConv::Swift:
2953 if (Subtarget->isTargetWindows() && IsVarArg)
2954 return CC_AArch64_Win64_VarArg;
2955 if (!Subtarget->isTargetDarwin())
2956 return CC_AArch64_AAPCS;
2957 return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
2958 case CallingConv::Win64:
2959 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
2960 case CallingConv::AArch64_VectorCall:
2961 return CC_AArch64_AAPCS;
2962 }
2963}
2964
2965CCAssignFn *
2966AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
2967 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2968 : RetCC_AArch64_AAPCS;
2969}
2970
2971SDValue AArch64TargetLowering::LowerFormalArguments(
2972 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2973 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2974 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2975 MachineFunction &MF = DAG.getMachineFunction();
2976 MachineFrameInfo &MFI = MF.getFrameInfo();
2977 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
2978
2979 // Assign locations to all of the incoming arguments.
2980 SmallVector<CCValAssign, 16> ArgLocs;
2981 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2982 *DAG.getContext());
2983
2984 // At this point, Ins[].VT may already be promoted to i32. To correctly
2985 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
2986 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
2987 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
2988 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
2989 // LocVT.
2990 unsigned NumArgs = Ins.size();
2991 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
2992 unsigned CurArgIdx = 0;
2993 for (unsigned i = 0; i != NumArgs; ++i) {
2994 MVT ValVT = Ins[i].VT;
2995 if (Ins[i].isOrigArg()) {
2996 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
2997 CurArgIdx = Ins[i].getOrigArgIndex();
2998
2999 // Get type of the original argument.
3000 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
3001 /*AllowUnknown*/ true);
3002 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
3003 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3004 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3005 ValVT = MVT::i8;
3006 else if (ActualMVT == MVT::i16)
3007 ValVT = MVT::i16;
3008 }
3009 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3010 bool Res =
3011 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
3012 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3012, __PRETTY_FUNCTION__))
;
3013 (void)Res;
3014 }
3015 assert(ArgLocs.size() == Ins.size())((ArgLocs.size() == Ins.size()) ? static_cast<void> (0)
: __assert_fail ("ArgLocs.size() == Ins.size()", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3015, __PRETTY_FUNCTION__))
;
3016 SmallVector<SDValue, 16> ArgValues;
3017 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3018 CCValAssign &VA = ArgLocs[i];
3019
3020 if (Ins[i].Flags.isByVal()) {
3021 // Byval is used for HFAs in the PCS, but the system should work in a
3022 // non-compliant manner for larger structs.
3023 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3024 int Size = Ins[i].Flags.getByValSize();
3025 unsigned NumRegs = (Size + 7) / 8;
3026
3027 // FIXME: This works on big-endian for composite byvals, which are the common
3028 // case. It should also work for fundamental types too.
3029 unsigned FrameIdx =
3030 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
3031 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
3032 InVals.push_back(FrameIdxN);
3033
3034 continue;
3035 }
3036
3037 if (VA.isRegLoc()) {
3038 // Arguments stored in registers.
3039 EVT RegVT = VA.getLocVT();
3040
3041 SDValue ArgValue;
3042 const TargetRegisterClass *RC;
3043
3044 if (RegVT == MVT::i32)
3045 RC = &AArch64::GPR32RegClass;
3046 else if (RegVT == MVT::i64)
3047 RC = &AArch64::GPR64RegClass;
3048 else if (RegVT == MVT::f16)
3049 RC = &AArch64::FPR16RegClass;
3050 else if (RegVT == MVT::f32)
3051 RC = &AArch64::FPR32RegClass;
3052 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
3053 RC = &AArch64::FPR64RegClass;
3054 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
3055 RC = &AArch64::FPR128RegClass;
3056 else
3057 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3057)
;
3058
3059 // Transform the arguments in physical registers into virtual ones.
3060 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3061 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
3062
3063 // If this is an 8, 16 or 32-bit value, it is really passed promoted
3064 // to 64 bits. Insert an assert[sz]ext to capture this, then
3065 // truncate to the right size.
3066 switch (VA.getLocInfo()) {
3067 default:
3068 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3068)
;
3069 case CCValAssign::Full:
3070 break;
3071 case CCValAssign::BCvt:
3072 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
3073 break;
3074 case CCValAssign::AExt:
3075 case CCValAssign::SExt:
3076 case CCValAssign::ZExt:
3077 // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
3078 // nodes after our lowering.
3079 assert(RegVT == Ins[i].VT && "incorrect register location selected")((RegVT == Ins[i].VT && "incorrect register location selected"
) ? static_cast<void> (0) : __assert_fail ("RegVT == Ins[i].VT && \"incorrect register location selected\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3079, __PRETTY_FUNCTION__))
;
3080 break;
3081 }
3082
3083 InVals.push_back(ArgValue);
3084
3085 } else { // VA.isRegLoc()
3086 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")((VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3086, __PRETTY_FUNCTION__))
;
3087 unsigned ArgOffset = VA.getLocMemOffset();
3088 unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
3089
3090 uint32_t BEAlign = 0;
3091 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
3092 !Ins[i].Flags.isInConsecutiveRegs())
3093 BEAlign = 8 - ArgSize;
3094
3095 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
3096
3097 // Create load nodes to retrieve arguments from the stack.
3098 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3099 SDValue ArgValue;
3100
3101 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
3102 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
3103 MVT MemVT = VA.getValVT();
3104
3105 switch (VA.getLocInfo()) {
3106 default:
3107 break;
3108 case CCValAssign::BCvt:
3109 MemVT = VA.getLocVT();
3110 break;
3111 case CCValAssign::SExt:
3112 ExtType = ISD::SEXTLOAD;
3113 break;
3114 case CCValAssign::ZExt:
3115 ExtType = ISD::ZEXTLOAD;
3116 break;
3117 case CCValAssign::AExt:
3118 ExtType = ISD::EXTLOAD;
3119 break;
3120 }
3121
3122 ArgValue = DAG.getExtLoad(
3123 ExtType, DL, VA.getLocVT(), Chain, FIN,
3124 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3125 MemVT);
3126
3127 InVals.push_back(ArgValue);
3128 }
3129 }
3130
3131 // varargs
3132 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3133 if (isVarArg) {
3134 if (!Subtarget->isTargetDarwin() || IsWin64) {
3135 // The AAPCS variadic function ABI is identical to the non-variadic
3136 // one. As a result there may be more arguments in registers and we should
3137 // save them for future reference.
3138 // Win64 variadic functions also pass arguments in registers, but all float
3139 // arguments are passed in integer registers.
3140 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
3141 }
3142
3143 // This will point to the next argument passed via stack.
3144 unsigned StackOffset = CCInfo.getNextStackOffset();
3145 // We currently pass all varargs at 8-byte alignment.
3146 StackOffset = ((StackOffset + 7) & ~7);
3147 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
3148
3149 if (MFI.hasMustTailInVarArgFunc()) {
3150 SmallVector<MVT, 2> RegParmTypes;
3151 RegParmTypes.push_back(MVT::i64);
3152 RegParmTypes.push_back(MVT::f128);
3153 // Compute the set of forwarded registers. The rest are scratch.
3154 SmallVectorImpl<ForwardedRegister> &Forwards =
3155 FuncInfo->getForwardedMustTailRegParms();
3156 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
3157 CC_AArch64_AAPCS);
3158 }
3159 }
3160
3161 unsigned StackArgSize = CCInfo.getNextStackOffset();
3162 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3163 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
3164 // This is a non-standard ABI so by fiat I say we're allowed to make full
3165 // use of the stack area to be popped, which must be aligned to 16 bytes in
3166 // any case:
3167 StackArgSize = alignTo(StackArgSize, 16);
3168
3169 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
3170 // a multiple of 16.
3171 FuncInfo->setArgumentStackToRestore(StackArgSize);
3172
3173 // This realignment carries over to the available bytes below. Our own
3174 // callers will guarantee the space is free by giving an aligned value to
3175 // CALLSEQ_START.
3176 }
3177 // Even if we're not expected to free up the space, it's useful to know how
3178 // much is there while considering tail calls (because we can reuse it).
3179 FuncInfo->setBytesInStackArgArea(StackArgSize);
3180
3181 if (Subtarget->hasCustomCallingConv())
3182 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
3183
3184 return Chain;
3185}
3186
3187void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
3188 SelectionDAG &DAG,
3189 const SDLoc &DL,
3190 SDValue &Chain) const {
3191 MachineFunction &MF = DAG.getMachineFunction();
3192 MachineFrameInfo &MFI = MF.getFrameInfo();
3193 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3194 auto PtrVT = getPointerTy(DAG.getDataLayout());
3195 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3196
3197 SmallVector<SDValue, 8> MemOps;
3198
3199 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
3200 AArch64::X3, AArch64::X4, AArch64::X5,
3201 AArch64::X6, AArch64::X7 };
3202 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
3203 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
3204
3205 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
3206 int GPRIdx = 0;
3207 if (GPRSaveSize != 0) {
3208 if (IsWin64) {
3209 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
3210 if (GPRSaveSize & 15)
3211 // The extra size here, if triggered, will always be 8.
3212 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
3213 } else
3214 GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
3215
3216 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
3217
3218 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
3219 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
3220 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
3221 SDValue Store = DAG.getStore(
3222 Val.getValue(1), DL, Val, FIN,
3223 IsWin64
3224 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
3225 GPRIdx,
3226 (i - FirstVariadicGPR) * 8)
3227 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
3228 MemOps.push_back(Store);
3229 FIN =
3230 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
3231 }
3232 }
3233 FuncInfo->setVarArgsGPRIndex(GPRIdx);
3234 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
3235
3236 if (Subtarget->hasFPARMv8() && !IsWin64) {
3237 static const MCPhysReg FPRArgRegs[] = {
3238 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
3239 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
3240 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
3241 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
3242
3243 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
3244 int FPRIdx = 0;
3245 if (FPRSaveSize != 0) {
3246 FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
3247
3248 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
3249
3250 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
3251 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
3252 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
3253
3254 SDValue Store = DAG.getStore(
3255 Val.getValue(1), DL, Val, FIN,
3256 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
3257 MemOps.push_back(Store);
3258 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3259 DAG.getConstant(16, DL, PtrVT));
3260 }
3261 }
3262 FuncInfo->setVarArgsFPRIndex(FPRIdx);
3263 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3264 }
3265
3266 if (!MemOps.empty()) {
3267 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3268 }
3269}
3270
3271/// LowerCallResult - Lower the result values of a call into the
3272/// appropriate copies out of appropriate physical registers.
3273SDValue AArch64TargetLowering::LowerCallResult(
3274 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3275 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3276 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3277 SDValue ThisVal) const {
3278 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3279 ? RetCC_AArch64_WebKit_JS
3280 : RetCC_AArch64_AAPCS;
3281 // Assign locations to each value returned by this call.
3282 SmallVector<CCValAssign, 16> RVLocs;
3283 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3284 *DAG.getContext());
3285 CCInfo.AnalyzeCallResult(Ins, RetCC);
3286
3287 // Copy all of the result registers out of their specified physreg.
3288 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3289 CCValAssign VA = RVLocs[i];
3290
3291 // Pass 'this' value directly from the argument to return value, to avoid
3292 // reg unit interference
3293 if (i == 0 && isThisReturn) {
3294 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3295, __PRETTY_FUNCTION__))
3295 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3295, __PRETTY_FUNCTION__))
;
3296 InVals.push_back(ThisVal);
3297 continue;
3298 }
3299
3300 SDValue Val =
3301 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3302 Chain = Val.getValue(1);
3303 InFlag = Val.getValue(2);
3304
3305 switch (VA.getLocInfo()) {
3306 default:
3307 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3307)
;
3308 case CCValAssign::Full:
3309 break;
3310 case CCValAssign::BCvt:
3311 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3312 break;
3313 }
3314
3315 InVals.push_back(Val);
3316 }
3317
3318 return Chain;
3319}
3320
3321/// Return true if the calling convention is one that we can guarantee TCO for.
3322static bool canGuaranteeTCO(CallingConv::ID CC) {
3323 return CC == CallingConv::Fast;
3324}
3325
3326/// Return true if we might ever do TCO for calls with this calling convention.
3327static bool mayTailCallThisCC(CallingConv::ID CC) {
3328 switch (CC) {
3329 case CallingConv::C:
3330 case CallingConv::PreserveMost:
3331 case CallingConv::Swift:
3332 return true;
3333 default:
3334 return canGuaranteeTCO(CC);
3335 }
3336}
3337
3338bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3339 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3340 const SmallVectorImpl<ISD::OutputArg> &Outs,
3341 const SmallVectorImpl<SDValue> &OutVals,
3342 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3343 if (!mayTailCallThisCC(CalleeCC))
3344 return false;
3345
3346 MachineFunction &MF = DAG.getMachineFunction();
3347 const Function &CallerF = MF.getFunction();
3348 CallingConv::ID CallerCC = CallerF.getCallingConv();
3349 bool CCMatch = CallerCC == CalleeCC;
3350
3351 // Byval parameters hand the function a pointer directly into the stack area
3352 // we want to reuse during a tail call. Working around this *is* possible (see
3353 // X86) but less efficient and uglier in LowerCall.
3354 for (Function::const_arg_iterator i = CallerF.arg_begin(),
3355 e = CallerF.arg_end();
3356 i != e; ++i)
3357 if (i->hasByValAttr())
3358 return false;
3359
3360 if (getTargetMachine().Options.GuaranteedTailCallOpt)
3361 return canGuaranteeTCO(CalleeCC) && CCMatch;
3362
3363 // Externally-defined functions with weak linkage should not be
3364 // tail-called on AArch64 when the OS does not support dynamic
3365 // pre-emption of symbols, as the AAELF spec requires normal calls
3366 // to undefined weak functions to be replaced with a NOP or jump to the
3367 // next instruction. The behaviour of branch instructions in this
3368 // situation (as used for tail calls) is implementation-defined, so we
3369 // cannot rely on the linker replacing the tail call with a return.
3370 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3371 const GlobalValue *GV = G->getGlobal();
3372 const Triple &TT = getTargetMachine().getTargetTriple();
3373 if (GV->hasExternalWeakLinkage() &&
3374 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3375 return false;
3376 }
3377
3378 // Now we search for cases where we can use a tail call without changing the
3379 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3380 // concept.
3381
3382 // I want anyone implementing a new calling convention to think long and hard
3383 // about this assert.
3384 assert((!isVarArg || CalleeCC == CallingConv::C) &&(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3385, __PRETTY_FUNCTION__))
3385 "Unexpected variadic calling convention")(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3385, __PRETTY_FUNCTION__))
;
3386
3387 LLVMContext &C = *DAG.getContext();
3388 if (isVarArg && !Outs.empty()) {
3389 // At least two cases here: if caller is fastcc then we can't have any
3390 // memory arguments (we'd be expected to clean up the stack afterwards). If
3391 // caller is C then we could potentially use its argument area.
3392
3393 // FIXME: for now we take the most conservative of these in both cases:
3394 // disallow all variadic memory operands.
3395 SmallVector<CCValAssign, 16> ArgLocs;
3396 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3397
3398 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3399 for (const CCValAssign &ArgLoc : ArgLocs)
3400 if (!ArgLoc.isRegLoc())
3401 return false;
3402 }
3403
3404 // Check that the call results are passed in the same way.
3405 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3406 CCAssignFnForCall(CalleeCC, isVarArg),
3407 CCAssignFnForCall(CallerCC, isVarArg)))
3408 return false;
3409 // The callee has to preserve all registers the caller needs to preserve.
3410 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3411 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3412 if (!CCMatch) {
3413 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3414 if (Subtarget->hasCustomCallingConv()) {
3415 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
3416 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
3417 }
3418 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3419 return false;
3420 }
3421
3422 // Nothing more to check if the callee is taking no arguments
3423 if (Outs.empty())
3424 return true;
3425
3426 SmallVector<CCValAssign, 16> ArgLocs;
3427 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3428
3429 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3430
3431 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3432
3433 // If the stack arguments for this call do not fit into our own save area then
3434 // the call cannot be made tail.
3435 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3436 return false;
3437
3438 const MachineRegisterInfo &MRI = MF.getRegInfo();
3439 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3440 return false;
3441
3442 return true;
3443}
3444
3445SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3446 SelectionDAG &DAG,
3447 MachineFrameInfo &MFI,
3448 int ClobberedFI) const {
3449 SmallVector<SDValue, 8> ArgChains;
3450 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3451 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3452
3453 // Include the original chain at the beginning of the list. When this is
3454 // used by target LowerCall hooks, this helps legalize find the
3455 // CALLSEQ_BEGIN node.
3456 ArgChains.push_back(Chain);
3457
3458 // Add a chain value for each stack argument corresponding
3459 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
3460 UE = DAG.getEntryNode().getNode()->use_end();
3461 U != UE; ++U)
3462 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
3463 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
3464 if (FI->getIndex() < 0) {
3465 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
3466 int64_t InLastByte = InFirstByte;
3467 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
3468
3469 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
3470 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
3471 ArgChains.push_back(SDValue(L, 1));
3472 }
3473
3474 // Build a tokenfactor for all the chains.
3475 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
3476}
3477
3478bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
3479 bool TailCallOpt) const {
3480 return CallCC == CallingConv::Fast && TailCallOpt;
3481}
3482
3483/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
3484/// and add input and output parameter nodes.
3485SDValue
3486AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
3487 SmallVectorImpl<SDValue> &InVals) const {
3488 SelectionDAG &DAG = CLI.DAG;
3489 SDLoc &DL = CLI.DL;
3490 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
3491 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
3492 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
3493 SDValue Chain = CLI.Chain;
3494 SDValue Callee = CLI.Callee;
3495 bool &IsTailCall = CLI.IsTailCall;
3496 CallingConv::ID CallConv = CLI.CallConv;
3497 bool IsVarArg = CLI.IsVarArg;
3498
3499 MachineFunction &MF = DAG.getMachineFunction();
3500 bool IsThisReturn = false;
3501
3502 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3503 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3504 bool IsSibCall = false;
3505
3506 if (IsTailCall) {
3507 // Check if it's really possible to do a tail call.
3508 IsTailCall = isEligibleForTailCallOptimization(
3509 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3510 if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
3511 report_fatal_error("failed to perform tail call elimination on a call "
3512 "site marked musttail");
3513
3514 // A sibling call is one where we're under the usual C ABI and not planning
3515 // to change that but can still do a tail call:
3516 if (!TailCallOpt && IsTailCall)
3517 IsSibCall = true;
3518
3519 if (IsTailCall)
3520 ++NumTailCalls;
3521 }
3522
3523 // Analyze operands of the call, assigning locations to each operand.
3524 SmallVector<CCValAssign, 16> ArgLocs;
3525 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
3526 *DAG.getContext());
3527
3528 if (IsVarArg) {
3529 // Handle fixed and variable vector arguments differently.
3530 // Variable vector arguments always go into memory.
3531 unsigned NumArgs = Outs.size();
3532
3533 for (unsigned i = 0; i != NumArgs; ++i) {
3534 MVT ArgVT = Outs[i].VT;
3535 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3536 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
3537 /*IsVarArg=*/ !Outs[i].IsFixed);
3538 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
3539 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3539, __PRETTY_FUNCTION__))
;
3540 (void)Res;
3541 }
3542 } else {
3543 // At this point, Outs[].VT may already be promoted to i32. To correctly
3544 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3545 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3546 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
3547 // we use a special version of AnalyzeCallOperands to pass in ValVT and
3548 // LocVT.
3549 unsigned NumArgs = Outs.size();
3550 for (unsigned i = 0; i != NumArgs; ++i) {
3551 MVT ValVT = Outs[i].VT;
3552 // Get type of the original argument.
3553 EVT ActualVT = getValueType(DAG.getDataLayout(),
3554 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
3555 /*AllowUnknown*/ true);
3556 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
3557 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3558 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3559 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3560 ValVT = MVT::i8;
3561 else if (ActualMVT == MVT::i16)
3562 ValVT = MVT::i16;
3563
3564 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3565 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
3566 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3566, __PRETTY_FUNCTION__))
;
3567 (void)Res;
3568 }
3569 }
3570
3571 // Get a count of how many bytes are to be pushed on the stack.
3572 unsigned NumBytes = CCInfo.getNextStackOffset();
3573
3574 if (IsSibCall) {
3575 // Since we're not changing the ABI to make this a tail call, the memory
3576 // operands are already available in the caller's incoming argument space.
3577 NumBytes = 0;
3578 }
3579
3580 // FPDiff is the byte offset of the call's argument area from the callee's.
3581 // Stores to callee stack arguments will be placed in FixedStackSlots offset
3582 // by this amount for a tail call. In a sibling call it must be 0 because the
3583 // caller will deallocate the entire stack and the callee still expects its
3584 // arguments to begin at SP+0. Completely unused for non-tail calls.
3585 int FPDiff = 0;
3586
3587 if (IsTailCall && !IsSibCall) {
3588 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
3589
3590 // Since callee will pop argument stack as a tail call, we must keep the
3591 // popped size 16-byte aligned.
3592 NumBytes = alignTo(NumBytes, 16);
3593
3594 // FPDiff will be negative if this tail call requires more space than we
3595 // would automatically have in our incoming argument space. Positive if we
3596 // can actually shrink the stack.
3597 FPDiff = NumReusableBytes - NumBytes;
3598
3599 // The stack pointer must be 16-byte aligned at all times it's used for a
3600 // memory operation, which in practice means at *all* times and in
3601 // particular across call boundaries. Therefore our own arguments started at
3602 // a 16-byte aligned SP and the delta applied for the tail call should
3603 // satisfy the same constraint.
3604 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")((FPDiff % 16 == 0 && "unaligned stack on tail call")
? static_cast<void> (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3604, __PRETTY_FUNCTION__))
;
3605 }
3606
3607 // Adjust the stack pointer for the new arguments...
3608 // These operations are automatically eliminated by the prolog/epilog pass
3609 if (!IsSibCall)
3610 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
3611
3612 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
3613 getPointerTy(DAG.getDataLayout()));
3614
3615 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3616 SmallVector<SDValue, 8> MemOpChains;
3617 auto PtrVT = getPointerTy(DAG.getDataLayout());
3618
3619 if (IsVarArg && CLI.CS && CLI.CS.isMustTailCall()) {
3620 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
3621 for (const auto &F : Forwards) {
3622 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
3623 RegsToPass.push_back(std::make_pair(unsigned(F.PReg), Val));
3624 }
3625 }
3626
3627 // Walk the register/memloc assignments, inserting copies/loads.
3628 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
3629 ++i, ++realArgIdx) {
3630 CCValAssign &VA = ArgLocs[i];
3631 SDValue Arg = OutVals[realArgIdx];
3632 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3633
3634 // Promote the value if needed.
3635 switch (VA.getLocInfo()) {
3636 default:
3637 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3637)
;
3638 case CCValAssign::Full:
3639 break;
3640 case CCValAssign::SExt:
3641 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
3642 break;
3643 case CCValAssign::ZExt:
3644 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3645 break;
3646 case CCValAssign::AExt:
3647 if (Outs[realArgIdx].ArgVT == MVT::i1) {
3648 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
3649 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3650 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
3651 }
3652 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
3653 break;
3654 case CCValAssign::BCvt:
3655 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3656 break;
3657 case CCValAssign::FPExt:
3658 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
3659 break;
3660 }
3661
3662 if (VA.isRegLoc()) {
3663 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
3664 Outs[0].VT == MVT::i64) {
3665 assert(VA.getLocVT() == MVT::i64 &&((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3666, __PRETTY_FUNCTION__))
3666 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3666, __PRETTY_FUNCTION__))
;
3667 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3668, __PRETTY_FUNCTION__))
3668 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3668, __PRETTY_FUNCTION__))
;
3669 IsThisReturn = true;
3670 }
3671 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3672 } else {
3673 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3673, __PRETTY_FUNCTION__))
;
3674
3675 SDValue DstAddr;
3676 MachinePointerInfo DstInfo;
3677
3678 // FIXME: This works on big-endian for composite byvals, which are the
3679 // common case. It should also work for fundamental types too.
3680 uint32_t BEAlign = 0;
3681 unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
3682 : VA.getValVT().getSizeInBits();
3683 OpSize = (OpSize + 7) / 8;
3684 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
3685 !Flags.isInConsecutiveRegs()) {
3686 if (OpSize < 8)
3687 BEAlign = 8 - OpSize;
3688 }
3689 unsigned LocMemOffset = VA.getLocMemOffset();
3690 int32_t Offset = LocMemOffset + BEAlign;
3691 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3692 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3693
3694 if (IsTailCall) {
3695 Offset = Offset + FPDiff;
3696 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3697
3698 DstAddr = DAG.getFrameIndex(FI, PtrVT);
3699 DstInfo =
3700 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
3701
3702 // Make sure any stack arguments overlapping with where we're storing
3703 // are loaded before this eventual operation. Otherwise they'll be
3704 // clobbered.
3705 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
3706 } else {
3707 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3708
3709 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3710 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
3711 LocMemOffset);
3712 }
3713
3714 if (Outs[i].Flags.isByVal()) {
3715 SDValue SizeNode =
3716 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
3717 SDValue Cpy = DAG.getMemcpy(
3718 Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
3719 /*isVol = */ false, /*AlwaysInline = */ false,
3720 /*isTailCall = */ false,
3721 DstInfo, MachinePointerInfo());
3722
3723 MemOpChains.push_back(Cpy);
3724 } else {
3725 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
3726 // promoted to a legal register type i32, we should truncate Arg back to
3727 // i1/i8/i16.
3728 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
3729 VA.getValVT() == MVT::i16)
3730 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
3731
3732 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
3733 MemOpChains.push_back(Store);
3734 }
3735 }
3736 }
3737
3738 if (!MemOpChains.empty())
3739 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3740
3741 // Build a sequence of copy-to-reg nodes chained together with token chain
3742 // and flag operands which copy the outgoing args into the appropriate regs.
3743 SDValue InFlag;
3744 for (auto &RegToPass : RegsToPass) {
3745 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
3746 RegToPass.second, InFlag);
3747 InFlag = Chain.getValue(1);
3748 }
3749
3750 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
3751 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
3752 // node so that legalize doesn't hack it.
3753 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3754 auto GV = G->getGlobal();
3755 if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) ==
3756 AArch64II::MO_GOT) {
3757 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
3758 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3759 } else if (Subtarget->isTargetCOFF() && GV->hasDLLImportStorageClass()) {
3760 assert(Subtarget->isTargetWindows() &&((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3761, __PRETTY_FUNCTION__))
3761 "Windows is the only supported COFF target")((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3761, __PRETTY_FUNCTION__))
;
3762 Callee = getGOT(G, DAG, AArch64II::MO_DLLIMPORT);
3763 } else {
3764 const GlobalValue *GV = G->getGlobal();
3765 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3766 }
3767 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3768 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3769 Subtarget->isTargetMachO()) {
3770 const char *Sym = S->getSymbol();
3771 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
3772 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3773 } else {
3774 const char *Sym = S->getSymbol();
3775 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
3776 }
3777 }
3778
3779 // We don't usually want to end the call-sequence here because we would tidy
3780 // the frame up *after* the call, however in the ABI-changing tail-call case
3781 // we've carefully laid out the parameters so that when sp is reset they'll be
3782 // in the correct location.
3783 if (IsTailCall && !IsSibCall) {
3784 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3785 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
3786 InFlag = Chain.getValue(1);
3787 }
3788
3789 std::vector<SDValue> Ops;
3790 Ops.push_back(Chain);
3791 Ops.push_back(Callee);
3792
3793 if (IsTailCall) {
3794 // Each tail call may have to adjust the stack by a different amount, so
3795 // this information must travel along with the operation for eventual
3796 // consumption by emitEpilogue.
3797 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
3798 }
3799
3800 // Add argument registers to the end of the list so that they are known live
3801 // into the call.
3802 for (auto &RegToPass : RegsToPass)
3803 Ops.push_back(DAG.getRegister(RegToPass.first,
3804 RegToPass.second.getValueType()));
3805
3806 // Add a register mask operand representing the call-preserved registers.
3807 const uint32_t *Mask;
3808 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3809 if (IsThisReturn) {
3810 // For 'this' returns, use the X0-preserving mask if applicable
3811 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
3812 if (!Mask) {
3813 IsThisReturn = false;
3814 Mask = TRI->getCallPreservedMask(MF, CallConv);
3815 }
3816 } else
3817 Mask = TRI->getCallPreservedMask(MF, CallConv);
3818
3819 if (Subtarget->hasCustomCallingConv())
3820 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
3821
3822 if (TRI->isAnyArgRegReserved(MF))
3823 TRI->emitReservedArgRegCallError(MF);
3824
3825 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3825, __PRETTY_FUNCTION__))
;
3826 Ops.push_back(DAG.getRegisterMask(Mask));
3827
3828 if (InFlag.getNode())
3829 Ops.push_back(InFlag);
3830
3831 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3832
3833 // If we're doing a tall call, use a TC_RETURN here rather than an
3834 // actual call instruction.
3835 if (IsTailCall) {
3836 MF.getFrameInfo().setHasTailCall();
3837 return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
3838 }
3839
3840 // Returns a chain and a flag for retval copy to use.
3841 Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
3842 InFlag = Chain.getValue(1);
3843
3844 uint64_t CalleePopBytes =
3845 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
3846
3847 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3848 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
3849 InFlag, DL);
3850 if (!Ins.empty())
3851 InFlag = Chain.getValue(1);
3852
3853 // Handle result values, copying them out of physregs into vregs that we
3854 // return.
3855 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
3856 InVals, IsThisReturn,
3857 IsThisReturn ? OutVals[0] : SDValue());
3858}
3859
3860bool AArch64TargetLowering::CanLowerReturn(
3861 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3862 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3863 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3864 ? RetCC_AArch64_WebKit_JS
3865 : RetCC_AArch64_AAPCS;
3866 SmallVector<CCValAssign, 16> RVLocs;
3867 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3868 return CCInfo.CheckReturn(Outs, RetCC);
3869}
3870
3871SDValue
3872AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3873 bool isVarArg,
3874 const SmallVectorImpl<ISD::OutputArg> &Outs,
3875 const SmallVectorImpl<SDValue> &OutVals,
3876 const SDLoc &DL, SelectionDAG &DAG) const {
3877 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3878 ? RetCC_AArch64_WebKit_JS
3879 : RetCC_AArch64_AAPCS;
3880 SmallVector<CCValAssign, 16> RVLocs;
3881 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3882 *DAG.getContext());
3883 CCInfo.AnalyzeReturn(Outs, RetCC);
3884
3885 // Copy the result values into the output registers.
3886 SDValue Flag;
3887 SmallVector<SDValue, 4> RetOps(1, Chain);
3888 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
3889 ++i, ++realRVLocIdx) {
3890 CCValAssign &VA = RVLocs[i];
3891 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3891, __PRETTY_FUNCTION__))
;
3892 SDValue Arg = OutVals[realRVLocIdx];
3893
3894 switch (VA.getLocInfo()) {
3895 default:
3896 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3896)
;
3897 case CCValAssign::Full:
3898 if (Outs[i].ArgVT == MVT::i1) {
3899 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
3900 // value. This is strictly redundant on Darwin (which uses "zeroext
3901 // i1"), but will be optimised out before ISel.
3902 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3903 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3904 }
3905 break;
3906 case CCValAssign::BCvt:
3907 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3908 break;
3909 }
3910
3911 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
3912 Flag = Chain.getValue(1);
3913 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3914 }
3915 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3916 const MCPhysReg *I =
3917 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3918 if (I) {
3919 for (; *I; ++I) {
3920 if (AArch64::GPR64RegClass.contains(*I))
3921 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3922 else if (AArch64::FPR64RegClass.contains(*I))
3923 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3924 else
3925 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3925)
;
3926 }
3927 }
3928
3929 RetOps[0] = Chain; // Update chain.
3930
3931 // Add the flag if we have it.
3932 if (Flag.getNode())
3933 RetOps.push_back(Flag);
3934
3935 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
3936}
3937
3938//===----------------------------------------------------------------------===//
3939// Other Lowering Code
3940//===----------------------------------------------------------------------===//
3941
3942SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
3943 SelectionDAG &DAG,
3944 unsigned Flag) const {
3945 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
3946 N->getOffset(), Flag);
3947}
3948
3949SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
3950 SelectionDAG &DAG,
3951 unsigned Flag) const {
3952 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
3953}
3954
3955SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
3956 SelectionDAG &DAG,
3957 unsigned Flag) const {
3958 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
3959 N->getOffset(), Flag);
3960}
3961
3962SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
3963 SelectionDAG &DAG,
3964 unsigned Flag) const {
3965 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
3966}
3967
3968// (loadGOT sym)
3969template <class NodeTy>
3970SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
3971 unsigned Flags) const {
3972 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
3973 SDLoc DL(N);
3974 EVT Ty = getPointerTy(DAG.getDataLayout());
3975 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
3976 // FIXME: Once remat is capable of dealing with instructions with register
3977 // operands, expand this into two nodes instead of using a wrapper node.
3978 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
3979}
3980
3981// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
3982template <class NodeTy>
3983SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
3984 unsigned Flags) const {
3985 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
3986 SDLoc DL(N);
3987 EVT Ty = getPointerTy(DAG.getDataLayout());
3988 const unsigned char MO_NC = AArch64II::MO_NC;
3989 return DAG.getNode(
3990 AArch64ISD::WrapperLarge, DL, Ty,
3991 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
3992 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
3993 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
3994 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
3995}
3996
3997// (addlow (adrp %hi(sym)) %lo(sym))
3998template <class NodeTy>
3999SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4000 unsigned Flags) const {
4001 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
4002 SDLoc DL(N);
4003 EVT Ty = getPointerTy(DAG.getDataLayout());
4004 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
4005 SDValue Lo = getTargetNode(N, Ty, DAG,
4006 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
4007 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
4008 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
4009}
4010
4011// (adr sym)
4012template <class NodeTy>
4013SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
4014 unsigned Flags) const {
4015 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrTiny\n"
; } } while (false)
;
4016 SDLoc DL(N);
4017 EVT Ty = getPointerTy(DAG.getDataLayout());
4018 SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
4019 return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
4020}
4021
4022SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
4023 SelectionDAG &DAG) const {
4024 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
4025 const GlobalValue *GV = GN->getGlobal();
4026 unsigned char OpFlags =
4027 Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
4028
4029 if (OpFlags != AArch64II::MO_NO_FLAG)
4030 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4031, __PRETTY_FUNCTION__))
4031 "unexpected offset in global node")((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4031, __PRETTY_FUNCTION__))
;
4032
4033 // This also catches the large code model case for Darwin, and tiny code
4034 // model with got relocations.
4035 if ((OpFlags & AArch64II::MO_GOT) != 0) {
4036 return getGOT(GN, DAG, OpFlags);
4037 }
4038
4039 SDValue Result;
4040 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4041 Result = getAddrLarge(GN, DAG, OpFlags);
4042 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4043 Result = getAddrTiny(GN, DAG, OpFlags);
4044 } else {
4045 Result = getAddr(GN, DAG, OpFlags);
4046 }
4047 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4048 SDLoc DL(GN);
4049 if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
4050 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4051 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
4052 return Result;
4053}
4054
4055/// Convert a TLS address reference into the correct sequence of loads
4056/// and calls to compute the variable's address (for Darwin, currently) and
4057/// return an SDValue containing the final node.
4058
4059/// Darwin only has one TLS scheme which must be capable of dealing with the
4060/// fully general situation, in the worst case. This means:
4061/// + "extern __thread" declaration.
4062/// + Defined in a possibly unknown dynamic library.
4063///
4064/// The general system is that each __thread variable has a [3 x i64] descriptor
4065/// which contains information used by the runtime to calculate the address. The
4066/// only part of this the compiler needs to know about is the first xword, which
4067/// contains a function pointer that must be called with the address of the
4068/// entire descriptor in "x0".
4069///
4070/// Since this descriptor may be in a different unit, in general even the
4071/// descriptor must be accessed via an indirect load. The "ideal" code sequence
4072/// is:
4073/// adrp x0, _var@TLVPPAGE
4074/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
4075/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
4076/// ; the function pointer
4077/// blr x1 ; Uses descriptor address in x0
4078/// ; Address of _var is now in x0.
4079///
4080/// If the address of _var's descriptor *is* known to the linker, then it can
4081/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
4082/// a slight efficiency gain.
4083SDValue
4084AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
4085 SelectionDAG &DAG) const {
4086 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4087, __PRETTY_FUNCTION__))
4087 "This function expects a Darwin target")((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4087, __PRETTY_FUNCTION__))
;
4088
4089 SDLoc DL(Op);
4090 MVT PtrVT = getPointerTy(DAG.getDataLayout());
4091 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4092
4093 SDValue TLVPAddr =
4094 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4095 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
4096
4097 // The first entry in the descriptor is a function pointer that we must call
4098 // to obtain the address of the variable.
4099 SDValue Chain = DAG.getEntryNode();
4100 SDValue FuncTLVGet = DAG.getLoad(
4101 MVT::i64, DL, Chain, DescAddr,
4102 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
4103 /* Alignment = */ 8,
4104 MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant |
4105 MachineMemOperand::MODereferenceable);
4106 Chain = FuncTLVGet.getValue(1);
4107
4108 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4109 MFI.setAdjustsStack(true);
4110
4111 // TLS calls preserve all registers except those that absolutely must be
4112 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
4113 // silly).
4114 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4115 const uint32_t *Mask = TRI->getTLSCallPreservedMask();
4116 if (Subtarget->hasCustomCallingConv())
4117 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
4118
4119 // Finally, we can make the call. This is just a degenerate version of a
4120 // normal AArch64 call node: x0 takes the address of the descriptor, and
4121 // returns the address of the variable in this thread.
4122 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
4123 Chain =
4124 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
4125 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
4126 DAG.getRegisterMask(Mask), Chain.getValue(1));
4127 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
4128}
4129
4130/// When accessing thread-local variables under either the general-dynamic or
4131/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
4132/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
4133/// is a function pointer to carry out the resolution.
4134///
4135/// The sequence is:
4136/// adrp x0, :tlsdesc:var
4137/// ldr x1, [x0, #:tlsdesc_lo12:var]
4138/// add x0, x0, #:tlsdesc_lo12:var
4139/// .tlsdesccall var
4140/// blr x1
4141/// (TPIDR_EL0 offset now in x0)
4142///
4143/// The above sequence must be produced unscheduled, to enable the linker to
4144/// optimize/relax this sequence.
4145/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
4146/// above sequence, and expanded really late in the compilation flow, to ensure
4147/// the sequence is produced as per above.
4148SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
4149 const SDLoc &DL,
4150 SelectionDAG &DAG) const {
4151 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4152
4153 SDValue Chain = DAG.getEntryNode();
4154 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4155
4156 Chain =
4157 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
4158 SDValue Glue = Chain.getValue(1);
4159
4160 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
4161}
4162
4163SDValue
4164AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
4165 SelectionDAG &DAG) const {
4166 assert(Subtarget->isTargetELF() && "This function expects an ELF target")((Subtarget->isTargetELF() && "This function expects an ELF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4166, __PRETTY_FUNCTION__))
;
4167 if (getTargetMachine().getCodeModel() == CodeModel::Large)
4168 report_fatal_error("ELF TLS only supported in small memory model");
4169 // Different choices can be made for the maximum size of the TLS area for a
4170 // module. For the small address model, the default TLS size is 16MiB and the
4171 // maximum TLS size is 4GiB.
4172 // FIXME: add -mtls-size command line option and make it control the 16MiB
4173 // vs. 4GiB code sequence generation.
4174 // FIXME: add tiny codemodel support. We currently generate the same code as
4175 // small, which may be larger than needed.
4176 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4177
4178 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
4179
4180 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
4181 if (Model == TLSModel::LocalDynamic)
4182 Model = TLSModel::GeneralDynamic;
4183 }
4184
4185 SDValue TPOff;
4186 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4187 SDLoc DL(Op);
4188 const GlobalValue *GV = GA->getGlobal();
4189
4190 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
4191
4192 if (Model == TLSModel::LocalExec) {
4193 SDValue HiVar = DAG.getTargetGlobalAddress(
4194 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4195 SDValue LoVar = DAG.getTargetGlobalAddress(
4196 GV, DL, PtrVT, 0,
4197 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4198
4199 SDValue TPWithOff_lo =
4200 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
4201 HiVar,
4202 DAG.getTargetConstant(0, DL, MVT::i32)),
4203 0);
4204 SDValue TPWithOff =
4205 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
4206 LoVar,
4207 DAG.getTargetConstant(0, DL, MVT::i32)),
4208 0);
4209 return TPWithOff;
4210 } else if (Model == TLSModel::InitialExec) {
4211 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4212 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
4213 } else if (Model == TLSModel::LocalDynamic) {
4214 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
4215 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
4216 // the beginning of the module's TLS region, followed by a DTPREL offset
4217 // calculation.
4218
4219 // These accesses will need deduplicating if there's more than one.
4220 AArch64FunctionInfo *MFI =
4221 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4222 MFI->incNumLocalDynamicTLSAccesses();
4223
4224 // The call needs a relocation too for linker relaxation. It doesn't make
4225 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4226 // the address.
4227 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
4228 AArch64II::MO_TLS);
4229
4230 // Now we can calculate the offset from TPIDR_EL0 to this module's
4231 // thread-local area.
4232 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4233
4234 // Now use :dtprel_whatever: operations to calculate this variable's offset
4235 // in its thread-storage area.
4236 SDValue HiVar = DAG.getTargetGlobalAddress(
4237 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4238 SDValue LoVar = DAG.getTargetGlobalAddress(
4239 GV, DL, MVT::i64, 0,
4240 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4241
4242 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
4243 DAG.getTargetConstant(0, DL, MVT::i32)),
4244 0);
4245 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
4246 DAG.getTargetConstant(0, DL, MVT::i32)),
4247 0);
4248 } else if (Model == TLSModel::GeneralDynamic) {
4249 // The call needs a relocation too for linker relaxation. It doesn't make
4250 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4251 // the address.
4252 SDValue SymAddr =
4253 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4254
4255 // Finally we can make a call to calculate the offset from tpidr_el0.
4256 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4257 } else
4258 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4258)
;
4259
4260 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
4261}
4262
4263SDValue
4264AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
4265 SelectionDAG &DAG) const {
4266 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")((Subtarget->isTargetWindows() && "Windows specific TLS lowering"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4266, __PRETTY_FUNCTION__))
;
4267
4268 SDValue Chain = DAG.getEntryNode();
4269 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4270 SDLoc DL(Op);
4271
4272 SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
4273
4274 // Load the ThreadLocalStoragePointer from the TEB
4275 // A pointer to the TLS array is located at offset 0x58 from the TEB.
4276 SDValue TLSArray =
4277 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
4278 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
4279 Chain = TLSArray.getValue(1);
4280
4281 // Load the TLS index from the C runtime;
4282 // This does the same as getAddr(), but without having a GlobalAddressSDNode.
4283 // This also does the same as LOADgot, but using a generic i32 load,
4284 // while LOADgot only loads i64.
4285 SDValue TLSIndexHi =
4286 DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
4287 SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
4288 "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4289 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
4290 SDValue TLSIndex =
4291 DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
4292 TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
4293 Chain = TLSIndex.getValue(1);
4294
4295 // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
4296 // offset into the TLSArray.
4297 TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
4298 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
4299 DAG.getConstant(3, DL, PtrVT));
4300 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
4301 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
4302 MachinePointerInfo());
4303 Chain = TLS.getValue(1);
4304
4305 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4306 const GlobalValue *GV = GA->getGlobal();
4307 SDValue TGAHi = DAG.getTargetGlobalAddress(
4308 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4309 SDValue TGALo = DAG.getTargetGlobalAddress(
4310 GV, DL, PtrVT, 0,
4311 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4312
4313 // Add the offset from the start of the .tls section (section base).
4314 SDValue Addr =
4315 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
4316 DAG.getTargetConstant(0, DL, MVT::i32)),
4317 0);
4318 Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
4319 return Addr;
4320}
4321
4322SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
4323 SelectionDAG &DAG) const {
4324 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4325 if (DAG.getTarget().useEmulatedTLS())
4326 return LowerToTLSEmulatedModel(GA, DAG);
4327
4328 if (Subtarget->isTargetDarwin())
4329 return LowerDarwinGlobalTLSAddress(Op, DAG);
4330 if (Subtarget->isTargetELF())
4331 return LowerELFGlobalTLSAddress(Op, DAG);
4332 if (Subtarget->isTargetWindows())
4333 return LowerWindowsGlobalTLSAddress(Op, DAG);
4334
4335 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4335)
;
4336}
4337
4338SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4339 SDValue Chain = Op.getOperand(0);
4340 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4341 SDValue LHS = Op.getOperand(2);
4342 SDValue RHS = Op.getOperand(3);
4343 SDValue Dest = Op.getOperand(4);
4344 SDLoc dl(Op);
4345
4346 MachineFunction &MF = DAG.getMachineFunction();
4347 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
4348 // will not be produced, as they are conditional branch instructions that do
4349 // not set flags.
4350 bool ProduceNonFlagSettingCondBr =
4351 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
4352
4353 // Handle f128 first, since lowering it will result in comparing the return
4354 // value of a libcall against zero, which is just what the rest of LowerBR_CC
4355 // is expecting to deal with.
4356 if (LHS.getValueType() == MVT::f128) {
4357 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4358
4359 // If softenSetCCOperands returned a scalar, we need to compare the result
4360 // against zero to select between true and false values.
4361 if (!RHS.getNode()) {
4362 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4363 CC = ISD::SETNE;
4364 }
4365 }
4366
4367 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4368 // instruction.
4369 if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
4370 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4371 // Only lower legal XALUO ops.
4372 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4373 return SDValue();
4374
4375 // The actual operation with overflow check.
4376 AArch64CC::CondCode OFCC;
4377 SDValue Value, Overflow;
4378 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
4379
4380 if (CC == ISD::SETNE)
4381 OFCC = getInvertedCondCode(OFCC);
4382 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
4383
4384 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4385 Overflow);
4386 }
4387
4388 if (LHS.getValueType().isInteger()) {
4389 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4390, __PRETTY_FUNCTION__))
4390 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4390, __PRETTY_FUNCTION__))
;
4391
4392 // If the RHS of the comparison is zero, we can potentially fold this
4393 // to a specialized branch.
4394 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
4395 if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
4396 if (CC == ISD::SETEQ) {
4397 // See if we can use a TBZ to fold in an AND as well.
4398 // TBZ has a smaller branch displacement than CBZ. If the offset is
4399 // out of bounds, a late MI-layer pass rewrites branches.
4400 // 403.gcc is an example that hits this case.
4401 if (LHS.getOpcode() == ISD::AND &&
4402 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4403 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4404 SDValue Test = LHS.getOperand(0);
4405 uint64_t Mask = LHS.getConstantOperandVal(1);
4406 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
4407 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4408 Dest);
4409 }
4410
4411 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
4412 } else if (CC == ISD::SETNE) {
4413 // See if we can use a TBZ to fold in an AND as well.
4414 // TBZ has a smaller branch displacement than CBZ. If the offset is
4415 // out of bounds, a late MI-layer pass rewrites branches.
4416 // 403.gcc is an example that hits this case.
4417 if (LHS.getOpcode() == ISD::AND &&
4418 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4419 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4420 SDValue Test = LHS.getOperand(0);
4421 uint64_t Mask = LHS.getConstantOperandVal(1);
4422 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
4423 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4424 Dest);
4425 }
4426
4427 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
4428 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
4429 // Don't combine AND since emitComparison converts the AND to an ANDS
4430 // (a.k.a. TST) and the test in the test bit and branch instruction
4431 // becomes redundant. This would also increase register pressure.
4432 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4433 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
4434 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4435 }
4436 }
4437 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
4438 LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
4439 // Don't combine AND since emitComparison converts the AND to an ANDS
4440 // (a.k.a. TST) and the test in the test bit and branch instruction
4441 // becomes redundant. This would also increase register pressure.
4442 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4443 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
4444 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4445 }
4446
4447 SDValue CCVal;
4448 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4449 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4450 Cmp);
4451 }
4452
4453 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4454, __PRETTY_FUNCTION__))
4454 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4454, __PRETTY_FUNCTION__))
;
4455
4456 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4457 // clean. Some of them require two branches to implement.
4458 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4459 AArch64CC::CondCode CC1, CC2;
4460 changeFPCCToAArch64CC(CC, CC1, CC2);
4461 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4462 SDValue BR1 =
4463 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
4464 if (CC2 != AArch64CC::AL) {
4465 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4466 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
4467 Cmp);
4468 }
4469
4470 return BR1;
4471}
4472
4473SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
4474 SelectionDAG &DAG) const {
4475 EVT VT = Op.getValueType();
4476 SDLoc DL(Op);
4477
4478 SDValue In1 = Op.getOperand(0);
4479 SDValue In2 = Op.getOperand(1);
4480 EVT SrcVT = In2.getValueType();
4481
4482 if (SrcVT.bitsLT(VT))
4483 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
4484 else if (SrcVT.bitsGT(VT))
4485 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
4486
4487 EVT VecVT;
4488 uint64_t EltMask;
4489 SDValue VecVal1, VecVal2;
4490
4491 auto setVecVal = [&] (int Idx) {
4492 if (!VT.isVector()) {
4493 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4494 DAG.getUNDEF(VecVT), In1);
4495 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4496 DAG.getUNDEF(VecVT), In2);
4497 } else {
4498 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
4499 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
4500 }
4501 };
4502
4503 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
4504 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
4505 EltMask = 0x80000000ULL;
4506 setVecVal(AArch64::ssub);
4507 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
4508 VecVT = MVT::v2i64;
4509
4510 // We want to materialize a mask with the high bit set, but the AdvSIMD
4511 // immediate moves cannot materialize that in a single instruction for
4512 // 64-bit elements. Instead, materialize zero and then negate it.
4513 EltMask = 0;
4514
4515 setVecVal(AArch64::dsub);
4516 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
4517 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
4518 EltMask = 0x8000ULL;
4519 setVecVal(AArch64::hsub);
4520 } else {
4521 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4521)
;
4522 }
4523
4524 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
4525
4526 // If we couldn't materialize the mask above, then the mask vector will be
4527 // the zero vector, and we need to negate it here.
4528 if (VT == MVT::f64 || VT == MVT::v2f64) {
4529 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
4530 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
4531 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
4532 }
4533
4534 SDValue Sel =
4535 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
4536
4537 if (VT == MVT::f16)
4538 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
4539 if (VT == MVT::f32)
4540 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
4541 else if (VT == MVT::f64)
4542 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
4543 else
4544 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
4545}
4546
4547SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
4548 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
4549 Attribute::NoImplicitFloat))
4550 return SDValue();
4551
4552 if (!Subtarget->hasNEON())
4553 return SDValue();
4554
4555 // While there is no integer popcount instruction, it can
4556 // be more efficiently lowered to the following sequence that uses
4557 // AdvSIMD registers/instructions as long as the copies to/from
4558 // the AdvSIMD registers are cheap.
4559 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
4560 // CNT V0.8B, V0.8B // 8xbyte pop-counts
4561 // ADDV B0, V0.8B // sum 8xbyte pop-counts
4562 // UMOV X0, V0.B[0] // copy byte result back to integer reg
4563 SDValue Val = Op.getOperand(0);
4564 SDLoc DL(Op);
4565 EVT VT = Op.getValueType();
4566
4567 if (VT == MVT::i32 || VT == MVT::i64) {
4568 if (VT == MVT::i32)
4569 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
4570 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
4571
4572 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
4573 SDValue UaddLV = DAG.getNode(
4574 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
4575 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
4576
4577 if (VT == MVT::i64)
4578 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
4579 return UaddLV;
4580 }
4581
4582 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4584, __PRETTY_FUNCTION__))
4583 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4584, __PRETTY_FUNCTION__))
4584 "Unexpected type for custom ctpop lowering")(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4584, __PRETTY_FUNCTION__))
;
4585
4586 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
4587 Val = DAG.getBitcast(VT8Bit, Val);
4588 Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
4589
4590 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
4591 unsigned EltSize = 8;
4592 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
4593 while (EltSize != VT.getScalarSizeInBits()) {
4594 EltSize *= 2;
4595 NumElts /= 2;
4596 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
4597 Val = DAG.getNode(
4598 ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
4599 DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
4600 }
4601
4602 return Val;
4603}
4604
4605SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
4606
4607 if (Op.getValueType().isVector())
4608 return LowerVSETCC(Op, DAG);
4609
4610 SDValue LHS = Op.getOperand(0);
4611 SDValue RHS = Op.getOperand(1);
4612 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
4613 SDLoc dl(Op);
4614
4615 // We chose ZeroOrOneBooleanContents, so use zero and one.
4616 EVT VT = Op.getValueType();
4617 SDValue TVal = DAG.getConstant(1, dl, VT);
4618 SDValue FVal = DAG.getConstant(0, dl, VT);
4619
4620 // Handle f128 first, since one possible outcome is a normal integer
4621 // comparison which gets picked up by the next if statement.
4622 if (LHS.getValueType() == MVT::f128) {
4623 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4624
4625 // If softenSetCCOperands returned a scalar, use it.
4626 if (!RHS.getNode()) {
4627 assert(LHS.getValueType() == Op.getValueType() &&((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4628, __PRETTY_FUNCTION__))
4628 "Unexpected setcc expansion!")((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4628, __PRETTY_FUNCTION__))
;
4629 return LHS;
4630 }
4631 }
4632
4633 if (LHS.getValueType().isInteger()) {
4634 SDValue CCVal;
4635 SDValue Cmp =
4636 getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
4637
4638 // Note that we inverted the condition above, so we reverse the order of
4639 // the true and false operands here. This will allow the setcc to be
4640 // matched to a single CSINC instruction.
4641 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
4642 }
4643
4644 // Now we know we're dealing with FP values.
4645 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4646, __PRETTY_FUNCTION__))
4646 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4646, __PRETTY_FUNCTION__))
;
4647
4648 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
4649 // and do the comparison.
4650 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4651
4652 AArch64CC::CondCode CC1, CC2;
4653 changeFPCCToAArch64CC(CC, CC1, CC2);
4654 if (CC2 == AArch64CC::AL) {
4655 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
4656 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4657
4658 // Note that we inverted the condition above, so we reverse the order of
4659 // the true and false operands here. This will allow the setcc to be
4660 // matched to a single CSINC instruction.
4661 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
4662 } else {
4663 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
4664 // totally clean. Some of them require two CSELs to implement. As is in
4665 // this case, we emit the first CSEL and then emit a second using the output
4666 // of the first as the RHS. We're effectively OR'ing the two CC's together.
4667
4668 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
4669 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4670 SDValue CS1 =
4671 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4672
4673 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4674 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4675 }
4676}
4677
4678SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
4679 SDValue RHS, SDValue TVal,
4680 SDValue FVal, const SDLoc &dl,
4681 SelectionDAG &DAG) const {
4682 // Handle f128 first, because it will result in a comparison of some RTLIB
4683 // call result against zero.
4684 if (LHS.getValueType() == MVT::f128) {
4685 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4686
4687 // If softenSetCCOperands returned a scalar, we need to compare the result
4688 // against zero to select between true and false values.
4689 if (!RHS.getNode()) {
4690 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4691 CC = ISD::SETNE;
4692 }
4693 }
4694
4695 // Also handle f16, for which we need to do a f32 comparison.
4696 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4697 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
4698 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
4699 }
4700
4701 // Next, handle integers.
4702 if (LHS.getValueType().isInteger()) {
4703 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4704, __PRETTY_FUNCTION__))
4704 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4704, __PRETTY_FUNCTION__))
;
4705
4706 unsigned Opcode = AArch64ISD::CSEL;
4707
4708 // If both the TVal and the FVal are constants, see if we can swap them in
4709 // order to for a CSINV or CSINC out of them.
4710 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
4711 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
4712
4713 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
4714 std::swap(TVal, FVal);
4715 std::swap(CTVal, CFVal);
4716 CC = ISD::getSetCCInverse(CC, true);
4717 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
4718 std::swap(TVal, FVal);
4719 std::swap(CTVal, CFVal);
4720 CC = ISD::getSetCCInverse(CC, true);
4721 } else if (TVal.getOpcode() == ISD::XOR) {
4722 // If TVal is a NOT we want to swap TVal and FVal so that we can match
4723 // with a CSINV rather than a CSEL.
4724 if (isAllOnesConstant(TVal.getOperand(1))) {
4725 std::swap(TVal, FVal);
4726 std::swap(CTVal, CFVal);
4727 CC = ISD::getSetCCInverse(CC, true);
4728 }
4729 } else if (TVal.getOpcode() == ISD::SUB) {
4730 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
4731 // that we can match with a CSNEG rather than a CSEL.
4732 if (isNullConstant(TVal.getOperand(0))) {
4733 std::swap(TVal, FVal);
4734 std::swap(CTVal, CFVal);
4735 CC = ISD::getSetCCInverse(CC, true);
4736 }
4737 } else if (CTVal && CFVal) {
4738 const int64_t TrueVal = CTVal->getSExtValue();
4739 const int64_t FalseVal = CFVal->getSExtValue();
4740 bool Swap = false;
4741
4742 // If both TVal and FVal are constants, see if FVal is the
4743 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
4744 // instead of a CSEL in that case.
4745 if (TrueVal == ~FalseVal) {
4746 Opcode = AArch64ISD::CSINV;
4747 } else if (TrueVal == -FalseVal) {
4748 Opcode = AArch64ISD::CSNEG;
4749 } else if (TVal.getValueType() == MVT::i32) {
4750 // If our operands are only 32-bit wide, make sure we use 32-bit
4751 // arithmetic for the check whether we can use CSINC. This ensures that
4752 // the addition in the check will wrap around properly in case there is
4753 // an overflow (which would not be the case if we do the check with
4754 // 64-bit arithmetic).
4755 const uint32_t TrueVal32 = CTVal->getZExtValue();
4756 const uint32_t FalseVal32 = CFVal->getZExtValue();
4757
4758 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
4759 Opcode = AArch64ISD::CSINC;
4760
4761 if (TrueVal32 > FalseVal32) {
4762 Swap = true;
4763 }
4764 }
4765 // 64-bit check whether we can use CSINC.
4766 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
4767 Opcode = AArch64ISD::CSINC;
4768
4769 if (TrueVal > FalseVal) {
4770 Swap = true;
4771 }
4772 }
4773
4774 // Swap TVal and FVal if necessary.
4775 if (Swap) {
4776 std::swap(TVal, FVal);
4777 std::swap(CTVal, CFVal);
4778 CC = ISD::getSetCCInverse(CC, true);
4779 }
4780
4781 if (Opcode != AArch64ISD::CSEL) {
4782 // Drop FVal since we can get its value by simply inverting/negating
4783 // TVal.
4784 FVal = TVal;
4785 }
4786 }
4787
4788 // Avoid materializing a constant when possible by reusing a known value in
4789 // a register. However, don't perform this optimization if the known value
4790 // is one, zero or negative one in the case of a CSEL. We can always
4791 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
4792 // FVal, respectively.
4793 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
4794 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
4795 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
4796 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4797 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
4798 // "a != C ? x : a" to avoid materializing C.
4799 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
4800 TVal = LHS;
4801 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
4802 FVal = LHS;
4803 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
4804 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")((CTVal && CFVal && "Expected constant operands for CSNEG."
) ? static_cast<void> (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4804, __PRETTY_FUNCTION__))
;
4805 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
4806 // avoid materializing C.
4807 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4808 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
4809 Opcode = AArch64ISD::CSINV;
4810 TVal = LHS;
4811 FVal = DAG.getConstant(0, dl, FVal.getValueType());
4812 }
4813 }
4814
4815 SDValue CCVal;
4816 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4817 EVT VT = TVal.getValueType();
4818 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
4819 }
4820
4821 // Now we know we're dealing with FP values.
4822 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4823, __PRETTY_FUNCTION__))
4823 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4823, __PRETTY_FUNCTION__))
;
4824 assert(LHS.getValueType() == RHS.getValueType())((LHS.getValueType() == RHS.getValueType()) ? static_cast<
void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4824, __PRETTY_FUNCTION__))
;
4825 EVT VT = TVal.getValueType();
4826 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4827
4828 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4829 // clean. Some of them require two CSELs to implement.
4830 AArch64CC::CondCode CC1, CC2;
4831 changeFPCCToAArch64CC(CC, CC1, CC2);
4832
4833 if (DAG.getTarget().Options.UnsafeFPMath) {
4834 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
4835 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
4836 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
4837 if (RHSVal && RHSVal->isZero()) {
4838 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
4839 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
4840
4841 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
4842 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
4843 TVal = LHS;
4844 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
4845 CFVal && CFVal->isZero() &&
4846 FVal.getValueType() == LHS.getValueType())
4847 FVal = LHS;
4848 }
4849 }
4850
4851 // Emit first, and possibly only, CSEL.
4852 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4853 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4854
4855 // If we need a second CSEL, emit it, using the output of the first as the
4856 // RHS. We're effectively OR'ing the two CC's together.
4857 if (CC2 != AArch64CC::AL) {
4858 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4859 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4860 }
4861
4862 // Otherwise, return the output of the first CSEL.
4863 return CS1;
4864}
4865
4866SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
4867 SelectionDAG &DAG) const {
4868 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4869 SDValue LHS = Op.getOperand(0);
4870 SDValue RHS = Op.getOperand(1);
4871 SDValue TVal = Op.getOperand(2);
4872 SDValue FVal = Op.getOperand(3);
4873 SDLoc DL(Op);
4874 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4875}
4876
4877SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
4878 SelectionDAG &DAG) const {
4879 SDValue CCVal = Op->getOperand(0);
4880 SDValue TVal = Op->getOperand(1);
4881 SDValue FVal = Op->getOperand(2);
4882 SDLoc DL(Op);
4883
4884 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
4885 // instruction.
4886 if (isOverflowIntrOpRes(CCVal)) {
4887 // Only lower legal XALUO ops.
4888 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
4889 return SDValue();
4890
4891 AArch64CC::CondCode OFCC;
4892 SDValue Value, Overflow;
4893 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
4894 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
4895
4896 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
4897 CCVal, Overflow);
4898 }
4899
4900 // Lower it the same way as we would lower a SELECT_CC node.
4901 ISD::CondCode CC;
4902 SDValue LHS, RHS;
4903 if (CCVal.getOpcode() == ISD::SETCC) {
4904 LHS = CCVal.getOperand(0);
4905 RHS = CCVal.getOperand(1);
4906 CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
4907 } else {
4908 LHS = CCVal;
4909 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
4910 CC = ISD::SETNE;
4911 }
4912 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4913}
4914
4915SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
4916 SelectionDAG &DAG) const {
4917 // Jump table entries as PC relative offsets. No additional tweaking
4918 // is necessary here. Just get the address of the jump table.
4919 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
4920
4921 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4922 !Subtarget->isTargetMachO()) {
4923 return getAddrLarge(JT, DAG);
4924 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4925 return getAddrTiny(JT, DAG);
4926 }
4927 return getAddr(JT, DAG);
4928}
4929
4930SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
4931 SelectionDAG &DAG) const {
4932 // Jump table entries as PC relative offsets. No additional tweaking
4933 // is necessary here. Just get the address of the jump table.
4934 SDLoc DL(Op);
4935 SDValue JT = Op.getOperand(1);
4936 SDValue Entry = Op.getOperand(2);
4937 int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
4938
4939 SDNode *Dest =
4940 DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
4941 Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
4942 return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
4943 SDValue(Dest, 0));
4944}
4945
4946SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
4947 SelectionDAG &DAG) const {
4948 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
4949
4950 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4951 // Use the GOT for the large code model on iOS.
4952 if (Subtarget->isTargetMachO()) {
4953 return getGOT(CP, DAG);
4954 }
4955 return getAddrLarge(CP, DAG);
4956 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4957 return getAddrTiny(CP, DAG);
4958 } else {
4959 return getAddr(CP, DAG);
4960 }
4961}
4962
4963SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
4964 SelectionDAG &DAG) const {
4965 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
4966 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4967 !Subtarget->isTargetMachO()) {
4968 return getAddrLarge(BA, DAG);
4969 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4970 return getAddrTiny(BA, DAG);
4971 }
4972 return getAddr(BA, DAG);
4973}
4974
4975SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
4976 SelectionDAG &DAG) const {
4977 AArch64FunctionInfo *FuncInfo =
4978 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4979
4980 SDLoc DL(Op);
4981 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
4982 getPointerTy(DAG.getDataLayout()));
4983 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4984 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4985 MachinePointerInfo(SV));
4986}
4987
4988SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
4989 SelectionDAG &DAG) const {
4990 AArch64FunctionInfo *FuncInfo =
4991 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4992
4993 SDLoc DL(Op);
4994 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
4995 ? FuncInfo->getVarArgsGPRIndex()
4996 : FuncInfo->getVarArgsStackIndex(),
4997 getPointerTy(DAG.getDataLayout()));
4998 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4999 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
5000 MachinePointerInfo(SV));
5001}
5002
5003SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
5004 SelectionDAG &DAG) const {
5005 // The layout of the va_list struct is specified in the AArch64 Procedure Call
5006 // Standard, section B.3.
5007 MachineFunction &MF = DAG.getMachineFunction();
5008 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5009 auto PtrVT = getPointerTy(DAG.getDataLayout());
5010 SDLoc DL(Op);
5011
5012 SDValue Chain = Op.getOperand(0);
5013 SDValue VAList = Op.getOperand(1);
5014 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5015 SmallVector<SDValue, 4> MemOps;
5016
5017 // void *__stack at offset 0
5018 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
5019 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
5020 MachinePointerInfo(SV), /* Alignment = */ 8));
5021
5022 // void *__gr_top at offset 8
5023 int GPRSize = FuncInfo->getVarArgsGPRSize();
5024 if (GPRSize > 0) {
5025 SDValue GRTop, GRTopAddr;
5026
5027 GRTopAddr =
5028 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
5029
5030 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
5031 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
5032 DAG.getConstant(GPRSize, DL, PtrVT));
5033
5034 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
5035 MachinePointerInfo(SV, 8),
5036 /* Alignment = */ 8));
5037 }
5038
5039 // void *__vr_top at offset 16
5040 int FPRSize = FuncInfo->getVarArgsFPRSize();
5041 if (FPRSize > 0) {
5042 SDValue VRTop, VRTopAddr;
5043 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5044 DAG.getConstant(16, DL, PtrVT));
5045
5046 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
5047 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
5048 DAG.getConstant(FPRSize, DL, PtrVT));
5049
5050 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
5051 MachinePointerInfo(SV, 16),
5052 /* Alignment = */ 8));
5053 }
5054
5055 // int __gr_offs at offset 24
5056 SDValue GROffsAddr =
5057 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
5058 MemOps.push_back(DAG.getStore(
5059 Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
5060 MachinePointerInfo(SV, 24), /* Alignment = */ 4));
5061
5062 // int __vr_offs at offset 28
5063 SDValue VROffsAddr =
5064 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
5065 MemOps.push_back(DAG.getStore(
5066 Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
5067 MachinePointerInfo(SV, 28), /* Alignment = */ 4));
5068
5069 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5070}
5071
5072SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
5073 SelectionDAG &DAG) const {
5074 MachineFunction &MF = DAG.getMachineFunction();
5075
5076 if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
5077 return LowerWin64_VASTART(Op, DAG);
5078 else if (Subtarget->isTargetDarwin())
5079 return LowerDarwin_VASTART(Op, DAG);
5080 else
5081 return LowerAAPCS_VASTART(Op, DAG);
5082}
5083
5084SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
5085 SelectionDAG &DAG) const {
5086 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
5087 // pointer.
5088 SDLoc DL(Op);
5089 unsigned VaListSize =
5090 Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
5091 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
5092 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
5093
5094 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
5095 Op.getOperand(2),
5096 DAG.getConstant(VaListSize, DL, MVT::i32),
5097 8, false, false, false, MachinePointerInfo(DestSV),
5098 MachinePointerInfo(SrcSV));
5099}
5100
5101SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
5102 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5103, __PRETTY_FUNCTION__))
5103 "automatic va_arg instruction only works on Darwin")((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5103, __PRETTY_FUNCTION__))
;
5104
5105 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5106 EVT VT = Op.getValueType();
5107 SDLoc DL(Op);
5108 SDValue Chain = Op.getOperand(0);
5109 SDValue Addr = Op.getOperand(1);
5110 unsigned Align = Op.getConstantOperandVal(3);
5111 auto PtrVT = getPointerTy(DAG.getDataLayout());
5112
5113 SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V));
5114 Chain = VAList.getValue(1);
5115
5116 if (Align > 8) {
5117 assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2")((((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"
) ? static_cast<void> (0) : __assert_fail ("((Align & (Align - 1)) == 0) && \"Expected Align to be a power of 2\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5117, __PRETTY_FUNCTION__))
;
5118 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5119 DAG.getConstant(Align - 1, DL, PtrVT));
5120 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
5121 DAG.getConstant(-(int64_t)Align, DL, PtrVT));
5122 }
5123
5124 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
5125 uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
5126
5127 // Scalar integer and FP values smaller than 64 bits are implicitly extended
5128 // up to 64 bits. At the very least, we have to increase the striding of the
5129 // vaargs list to match this, and for FP values we need to introduce
5130 // FP_ROUND nodes as well.
5131 if (VT.isInteger() && !VT.isVector())
5132 ArgSize = 8;
5133 bool NeedFPTrunc = false;
5134 if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
5135 ArgSize = 8;
5136 NeedFPTrunc = true;
5137 }
5138
5139 // Increment the pointer, VAList, to the next vaarg
5140 SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5141 DAG.getConstant(ArgSize, DL, PtrVT));
5142 // Store the incremented VAList to the legalized pointer
5143 SDValue APStore =
5144 DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
5145
5146 // Load the actual argument out of the pointer VAList
5147 if (NeedFPTrunc) {
5148 // Load the value as an f64.
5149 SDValue WideFP =
5150 DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
5151 // Round the value down to an f32.
5152 SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
5153 DAG.getIntPtrConstant(1, DL));
5154 SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
5155 // Merge the rounded value with the chain output of the load.
5156 return DAG.getMergeValues(Ops, DL);
5157 }
5158
5159 return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
5160}
5161
5162SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
5163 SelectionDAG &DAG) const {
5164 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5165 MFI.setFrameAddressIsTaken(true);
5166
5167 EVT VT = Op.getValueType();
5168 SDLoc DL(Op);
5169 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5170 SDValue FrameAddr =
5171 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
5172 while (Depth--)
5173 FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
5174 MachinePointerInfo());
5175 return FrameAddr;
5176}
5177
5178SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
5179 SelectionDAG &DAG) const {
5180 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5181
5182 EVT VT = getPointerTy(DAG.getDataLayout());
5183 SDLoc DL(Op);
5184 int FI = MFI.CreateFixedObject(4, 0, false);
5185 return DAG.getFrameIndex(FI, VT);
5186}
5187
5188// FIXME? Maybe this could be a TableGen attribute on some registers and
5189// this table could be generated automatically from RegInfo.
5190unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
5191 SelectionDAG &DAG) const {
5192 unsigned Reg = StringSwitch<unsigned>(RegName)
5193 .Case("sp", AArch64::SP)
5194 .Case("x1", AArch64::X1)
5195 .Case("w1", AArch64::W1)
5196 .Case("x2", AArch64::X2)
5197 .Case("w2", AArch64::W2)
5198 .Case("x3", AArch64::X3)
5199 .Case("w3", AArch64::W3)
5200 .Case("x4", AArch64::X4)
5201 .Case("w4", AArch64::W4)
5202 .Case("x5", AArch64::X5)
5203 .Case("w5", AArch64::W5)
5204 .Case("x6", AArch64::X6)
5205 .Case("w6", AArch64::W6)
5206 .Case("x7", AArch64::X7)
5207 .Case("w7", AArch64::W7)
5208 .Case("x18", AArch64::X18)
5209 .Case("w18", AArch64::W18)
5210 .Case("x20", AArch64::X20)
5211 .Case("w20", AArch64::W20)
5212 .Default(0);
5213 if (((Reg == AArch64::X1 || Reg == AArch64::W1) &&
5214 !Subtarget->isXRegisterReserved(1)) ||
5215 ((Reg == AArch64::X2 || Reg == AArch64::W2) &&
5216 !Subtarget->isXRegisterReserved(2)) ||
5217 ((Reg == AArch64::X3 || Reg == AArch64::W3) &&
5218 !Subtarget->isXRegisterReserved(3)) ||
5219 ((Reg == AArch64::X4 || Reg == AArch64::W4) &&
5220 !Subtarget->isXRegisterReserved(4)) ||
5221 ((Reg == AArch64::X5 || Reg == AArch64::W5) &&
5222 !Subtarget->isXRegisterReserved(5)) ||
5223 ((Reg == AArch64::X6 || Reg == AArch64::W6) &&
5224 !Subtarget->isXRegisterReserved(6)) ||
5225 ((Reg == AArch64::X7 || Reg == AArch64::W7) &&
5226 !Subtarget->isXRegisterReserved(7)) ||
5227 ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
5228 !Subtarget->isXRegisterReserved(18)) ||
5229 ((Reg == AArch64::X20 || Reg == AArch64::W20) &&
5230 !Subtarget->isXRegisterReserved(20)))
5231 Reg = 0;
5232 if (Reg)
5233 return Reg;
5234 report_fatal_error(Twine("Invalid register name \""
5235 + StringRef(RegName) + "\"."));
5236}
5237
5238SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
5239 SelectionDAG &DAG) const {
5240 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
5241
5242 EVT VT = Op.getValueType();
5243 SDLoc DL(Op);
5244
5245 SDValue FrameAddr =
5246 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
5247 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
5248
5249 return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
5250}
5251
5252SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
5253 SelectionDAG &DAG) const {
5254 MachineFunction &MF = DAG.getMachineFunction();
5255 MachineFrameInfo &MFI = MF.getFrameInfo();
5256 MFI.setReturnAddressIsTaken(true);
5257
5258 EVT VT = Op.getValueType();
5259 SDLoc DL(Op);
5260 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5261 if (Depth) {
5262 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5263 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
5264 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
5265 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
5266 MachinePointerInfo());
5267 }
5268
5269 // Return LR, which contains the return address. Mark it an implicit live-in.
5270 unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
5271 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
5272}
5273
5274/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
5275/// i64 values and take a 2 x i64 value to shift plus a shift amount.
5276SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
5277 SelectionDAG &DAG) const {
5278 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5278, __PRETTY_FUNCTION__))
;
5279 EVT VT = Op.getValueType();
5280 unsigned VTBits = VT.getSizeInBits();
5281 SDLoc dl(Op);
5282 SDValue ShOpLo = Op.getOperand(0);
5283 SDValue ShOpHi = Op.getOperand(1);
5284 SDValue ShAmt = Op.getOperand(2);
5285 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
5286
5287 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::
SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5287, __PRETTY_FUNCTION__))
;
5288
5289 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
5290 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
5291 SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
5292
5293 // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
5294 // is "undef". We wanted 0, so CSEL it directly.
5295 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
5296 ISD::SETEQ, dl, DAG);
5297 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
5298 HiBitsForLo =
5299 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
5300 HiBitsForLo, CCVal, Cmp);
5301
5302 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
5303 DAG.getConstant(VTBits, dl, MVT::i64));
5304
5305 SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
5306 SDValue LoForNormalShift =
5307 DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
5308
5309 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
5310 dl, DAG);
5311 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
5312 SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
5313 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
5314 LoForNormalShift, CCVal, Cmp);
5315
5316 // AArch64 shifts larger than the register width are wrapped rather than
5317 // clamped, so we can't just emit "hi >> x".
5318 SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
5319 SDValue HiForBigShift =
5320 Opc == ISD::SRA
5321 ? DAG.getNode(Opc, dl, VT, ShOpHi,
5322 DAG.getConstant(VTBits - 1, dl, MVT::i64))
5323 : DAG.getConstant(0, dl, VT);
5324 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
5325 HiForNormalShift, CCVal, Cmp);
5326
5327 SDValue Ops[2] = { Lo, Hi };
5328 return DAG.getMergeValues(Ops, dl);
5329}
5330
5331/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
5332/// i64 values and take a 2 x i64 value to shift plus a shift amount.
5333SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
5334 SelectionDAG &DAG) const {
5335 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5335, __PRETTY_FUNCTION__))
;
5336 EVT VT = Op.getValueType();
5337 unsigned VTBits = VT.getSizeInBits();
5338 SDLoc dl(Op);
5339 SDValue ShOpLo = Op.getOperand(0);
5340 SDValue ShOpHi = Op.getOperand(1);
5341 SDValue ShAmt = Op.getOperand(2);
5342
5343 assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void>
(0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5343, __PRETTY_FUNCTION__))
;
5344 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
5345 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
5346 SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
5347
5348 // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
5349 // is "undef". We wanted 0, so CSEL it directly.
5350 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
5351 ISD::SETEQ, dl, DAG);
5352 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
5353 LoBitsForHi =
5354 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
5355 LoBitsForHi, CCVal, Cmp);
5356
5357 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
5358 DAG.getConstant(VTBits, dl, MVT::i64));
5359 SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
5360 SDValue HiForNormalShift =
5361 DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
5362
5363 SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
5364
5365 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
5366 dl, DAG);
5367 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
5368 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
5369 HiForNormalShift, CCVal, Cmp);
5370
5371 // AArch64 shifts of larger than register sizes are wrapped rather than
5372 // clamped, so we can't just emit "lo << a" if a is too big.
5373 SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
5374 SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
5375 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
5376 LoForNormalShift, CCVal, Cmp);
5377
5378 SDValue Ops[2] = { Lo, Hi };
5379 return DAG.getMergeValues(Ops, dl);
5380}
5381
5382bool AArch64TargetLowering::isOffsetFoldingLegal(
5383 const GlobalAddressSDNode *GA) const {
5384 // Offsets are folded in the DAG combine rather than here so that we can
5385 // intelligently choose an offset based on the uses.
5386 return false;
5387}
5388
5389bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
5390 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
5391 // FIXME: We should be able to handle f128 as well with a clever lowering.
5392 if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
5393 (VT == MVT::f16 && Subtarget->hasFullFP16()))) {
5394 LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString() << " imm value: 0\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal " << VT.getEVTString
() << " imm value: 0\n"; } } while (false)
;
5395 return true;
5396 }
5397
5398 bool IsLegal = false;
5399 SmallString<128> ImmStrVal;
5400 Imm.toString(ImmStrVal);
5401
5402 if (VT == MVT::f64)
5403 IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
5404 else if (VT == MVT::f32)
5405 IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
5406 else if (VT == MVT::f16 && Subtarget->hasFullFP16())
5407 IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
5408
5409 if (IsLegal) {
5410 LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal " << VT.getEVTString
() << " imm value: " << ImmStrVal << "\n"; }
} while (false)
5411 << " imm value: " << ImmStrVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal " << VT.getEVTString
() << " imm value: " << ImmStrVal << "\n"; }
} while (false)
;
5412 return true;
5413 }
5414
5415 LLVM_DEBUG(dbgs() << "Illegal " << VT.getEVTString()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal " << VT.getEVTString
() << " imm value: " << ImmStrVal << "\n"; }
} while (false)
5416 << " imm value: " << ImmStrVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal " << VT.getEVTString
() << " imm value: " << ImmStrVal << "\n"; }
} while (false)
;
5417 return false;
5418}
5419
5420//===----------------------------------------------------------------------===//
5421// AArch64 Optimization Hooks
5422//===----------------------------------------------------------------------===//
5423
5424static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
5425 SDValue Operand, SelectionDAG &DAG,
5426 int &ExtraSteps) {
5427 EVT VT = Operand.getValueType();
5428 if (ST->hasNEON() &&
5429 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
5430 VT == MVT::f32 || VT == MVT::v1f32 ||
5431 VT == MVT::v2f32 || VT == MVT::v4f32)) {
5432 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
5433 // For the reciprocal estimates, convergence is quadratic, so the number
5434 // of digits is doubled after each iteration. In ARMv8, the accuracy of
5435 // the initial estimate is 2^-8. Thus the number of extra steps to refine
5436 // the result for float (23 mantissa bits) is 2 and for double (52
5437 // mantissa bits) is 3.
5438 ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
5439
5440 return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
5441 }
5442
5443 return SDValue();
5444}
5445
5446SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
5447 SelectionDAG &DAG, int Enabled,
5448 int &ExtraSteps,
5449 bool &UseOneConst,
5450 bool Reciprocal) const {
5451 if (Enabled == ReciprocalEstimate::Enabled ||
5452 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
5453 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
5454 DAG, ExtraSteps)) {
5455 SDLoc DL(Operand);
5456 EVT VT = Operand.getValueType();
5457
5458 SDNodeFlags Flags;
5459 Flags.setAllowReassociation(true);
5460
5461 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
5462 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
5463 for (int i = ExtraSteps; i > 0; --i) {
5464 SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
5465 Flags);
5466 Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
5467 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5468 }
5469 if (!Reciprocal) {
5470 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
5471 VT);
5472 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
5473 SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
5474
5475 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
5476 // Correct the result if the operand is 0.0.
5477 Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
5478 VT, Eq, Operand, Estimate);
5479 }
5480
5481 ExtraSteps = 0;
5482 return Estimate;
5483 }
5484
5485 return SDValue();
5486}
5487
5488SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
5489 SelectionDAG &DAG, int Enabled,
5490 int &ExtraSteps) const {
5491 if (Enabled == ReciprocalEstimate::Enabled)
5492 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
5493 DAG, ExtraSteps)) {
5494 SDLoc DL(Operand);
5495 EVT VT = Operand.getValueType();
5496
5497 SDNodeFlags Flags;
5498 Flags.setAllowReassociation(true);
5499
5500 // Newton reciprocal iteration: E * (2 - X * E)
5501 // AArch64 reciprocal iteration instruction: (2 - M * N)
5502 for (int i = ExtraSteps; i > 0; --i) {
5503 SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
5504 Estimate, Flags);
5505 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5506 }
5507
5508 ExtraSteps = 0;
5509 return Estimate;
5510 }
5511
5512 return SDValue();
5513}
5514
5515//===----------------------------------------------------------------------===//
5516// AArch64 Inline Assembly Support
5517//===----------------------------------------------------------------------===//
5518
5519// Table of Constraints
5520// TODO: This is the current set of constraints supported by ARM for the
5521// compiler, not all of them may make sense.
5522//
5523// r - A general register
5524// w - An FP/SIMD register of some size in the range v0-v31
5525// x - An FP/SIMD register of some size in the range v0-v15
5526// I - Constant that can be used with an ADD instruction
5527// J - Constant that can be used with a SUB instruction
5528// K - Constant that can be used with a 32-bit logical instruction
5529// L - Constant that can be used with a 64-bit logical instruction
5530// M - Constant that can be used as a 32-bit MOV immediate
5531// N - Constant that can be used as a 64-bit MOV immediate
5532// Q - A memory reference with base register and no offset
5533// S - A symbolic address
5534// Y - Floating point constant zero
5535// Z - Integer constant zero
5536//
5537// Note that general register operands will be output using their 64-bit x
5538// register name, whatever the size of the variable, unless the asm operand
5539// is prefixed by the %w modifier. Floating-point and SIMD register operands
5540// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
5541// %q modifier.
5542const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5543 // At this point, we have to lower this constraint to something else, so we
5544 // lower it to an "r" or "w". However, by doing this we will force the result
5545 // to be in register, while the X constraint is much more permissive.
5546 //
5547 // Although we are correct (we are free to emit anything, without
5548 // constraints), we might break use cases that would expect us to be more
5549 // efficient and emit something else.
5550 if (!Subtarget->hasFPARMv8())
5551 return "r";
5552
5553 if (ConstraintVT.isFloatingPoint())
5554 return "w";
5555
5556 if (ConstraintVT.isVector() &&
5557 (ConstraintVT.getSizeInBits() == 64 ||
5558 ConstraintVT.getSizeInBits() == 128))
5559 return "w";
5560
5561 return "r";
5562}
5563
5564/// getConstraintType - Given a constraint letter, return the type of
5565/// constraint it is for this target.
5566AArch64TargetLowering::ConstraintType
5567AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
5568 if (Constraint.size() == 1) {
5569 switch (Constraint[0]) {
5570 default:
5571 break;
5572 case 'z':
5573 return C_Other;
5574 case 'x':
5575 case 'w':
5576 return C_RegisterClass;
5577 // An address with a single base register. Due to the way we
5578 // currently handle addresses it is the same as 'r'.
5579 case 'Q':
5580 return C_Memory;
5581 case 'S': // A symbolic address
5582 return C_Other;
5583 }
5584 }
5585 return TargetLowering::getConstraintType(Constraint);
5586}
5587
5588/// Examine constraint type and operand type and determine a weight value.
5589/// This object must already have been set up with the operand type
5590/// and the current alternative constraint selected.
5591TargetLowering::ConstraintWeight
5592AArch64TargetLowering::getSingleConstraintMatchWeight(
5593 AsmOperandInfo &info, const char *constraint) const {
5594 ConstraintWeight weight = CW_Invalid;
5595 Value *CallOperandVal = info.CallOperandVal;
5596 // If we don't have a value, we can't do a match,
5597 // but allow it at the lowest weight.
5598 if (!CallOperandVal)
5599 return CW_Default;
5600 Type *type = CallOperandVal->getType();
5601 // Look at the constraint type.
5602 switch (*constraint) {
5603 default:
5604 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
5605 break;
5606 case 'x':
5607 case 'w':
5608 if (type->isFloatingPointTy() || type->isVectorTy())
5609 weight = CW_Register;
5610 break;
5611 case 'z':
5612 weight = CW_Constant;
5613 break;
5614 }
5615 return weight;
5616}
5617
5618std::pair<unsigned, const TargetRegisterClass *>
5619AArch64TargetLowering::getRegForInlineAsmConstraint(
5620 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5621 if (Constraint.size() == 1) {
5622 switch (Constraint[0]) {
5623 case 'r':
5624 if (VT.getSizeInBits() == 64)
5625 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
5626 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
5627 case 'w':
5628 if (!Subtarget->hasFPARMv8())
5629 break;
5630 if (VT.getSizeInBits() == 16)
5631 return std::make_pair(0U, &AArch64::FPR16RegClass);
5632 if (VT.getSizeInBits() == 32)
5633 return std::make_pair(0U, &AArch64::FPR32RegClass);
5634 if (VT.getSizeInBits() == 64)
5635 return std::make_pair(0U, &AArch64::FPR64RegClass);
5636 if (VT.getSizeInBits() == 128)
5637 return std::make_pair(0U, &AArch64::FPR128RegClass);
5638 break;
5639 // The instructions that this constraint is designed for can
5640 // only take 128-bit registers so just use that regclass.
5641 case 'x':
5642 if (!Subtarget->hasFPARMv8())
5643 break;
5644 if (VT.getSizeInBits() == 128)
5645 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
5646 break;
5647 }
5648 }
5649 if (StringRef("{cc}").equals_lower(Constraint))
5650 return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
5651
5652 // Use the default implementation in TargetLowering to convert the register
5653 // constraint into a member of a register class.
5654 std::pair<unsigned, const TargetRegisterClass *> Res;
5655 Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5656
5657 // Not found as a standard register?
5658 if (!Res.second) {
5659 unsigned Size = Constraint.size();
5660 if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
5661 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
5662 int RegNo;
5663 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
5664 if (!Failed && RegNo >= 0 && RegNo <= 31) {
5665 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
5666 // By default we'll emit v0-v31 for this unless there's a modifier where
5667 // we'll emit the correct register as well.
5668 if (VT != MVT::Other && VT.getSizeInBits() == 64) {
5669 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
5670 Res.second = &AArch64::FPR64RegClass;
5671 } else {
5672 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
5673 Res.second = &AArch64::FPR128RegClass;
5674 }
5675 }
5676 }
5677 }
5678
5679 if (Res.second && !Subtarget->hasFPARMv8() &&
5680 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
5681 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
5682 return std::make_pair(0U, nullptr);
5683
5684 return Res;
5685}
5686
5687/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5688/// vector. If it is invalid, don't add anything to Ops.
5689void AArch64TargetLowering::LowerAsmOperandForConstraint(
5690 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
5691 SelectionDAG &DAG) const {
5692 SDValue Result;
5693
5694 // Currently only support length 1 constraints.
5695 if (Constraint.length() != 1)
5696 return;
5697
5698 char ConstraintLetter = Constraint[0];
5699 switch (ConstraintLetter) {
5700 default:
5701 break;
5702
5703 // This set of constraints deal with valid constants for various instructions.
5704 // Validate and return a target constant for them if we can.
5705 case 'z': {
5706 // 'z' maps to xzr or wzr so it needs an input of 0.
5707 if (!isNullConstant(Op))
5708 return;
5709
5710 if (Op.getValueType() == MVT::i64)
5711 Result = DAG.getRegister(AArch64::XZR, MVT::i64);
5712 else
5713 Result = DAG.getRegister(AArch64::WZR, MVT::i32);
5714 break;
5715 }
5716 case 'S': {
5717 // An absolute symbolic address or label reference.
5718 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5719 Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5720 GA->getValueType(0));
5721 } else if (const BlockAddressSDNode *BA =
5722 dyn_cast<BlockAddressSDNode>(Op)) {
5723 Result =
5724 DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
5725 } else if (const ExternalSymbolSDNode *ES =
5726 dyn_cast<ExternalSymbolSDNode>(Op)) {
5727 Result =
5728 DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0));
5729 } else
5730 return;
5731 break;
5732 }
5733
5734 case 'I':
5735 case 'J':
5736 case 'K':
5737 case 'L':
5738 case 'M':
5739 case 'N':
5740 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5741 if (!C)
5742 return;
5743
5744 // Grab the value and do some validation.
5745 uint64_t CVal = C->getZExtValue();
5746 switch (ConstraintLetter) {
5747 // The I constraint applies only to simple ADD or SUB immediate operands:
5748 // i.e. 0 to 4095 with optional shift by 12
5749 // The J constraint applies only to ADD or SUB immediates that would be
5750 // valid when negated, i.e. if [an add pattern] were to be output as a SUB
5751 // instruction [or vice versa], in other words -1 to -4095 with optional
5752 // left shift by 12.
5753 case 'I':
5754 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
5755 break;
5756 return;
5757 case 'J': {
5758 uint64_t NVal = -C->getSExtValue();
5759 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
5760 CVal = C->getSExtValue();
5761 break;
5762 }
5763 return;
5764 }
5765 // The K and L constraints apply *only* to logical immediates, including
5766 // what used to be the MOVI alias for ORR (though the MOVI alias has now
5767 // been removed and MOV should be used). So these constraints have to
5768 // distinguish between bit patterns that are valid 32-bit or 64-bit
5769 // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
5770 // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
5771 // versa.
5772 case 'K':
5773 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5774 break;
5775 return;
5776 case 'L':
5777 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5778 break;
5779 return;
5780 // The M and N constraints are a superset of K and L respectively, for use
5781 // with the MOV (immediate) alias. As well as the logical immediates they
5782 // also match 32 or 64-bit immediates that can be loaded either using a
5783 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
5784 // (M) or 64-bit 0x1234000000000000 (N) etc.
5785 // As a note some of this code is liberally stolen from the asm parser.
5786 case 'M': {
5787 if (!isUInt<32>(CVal))
5788 return;
5789 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5790 break;
5791 if ((CVal & 0xFFFF) == CVal)
5792 break;
5793 if ((CVal & 0xFFFF0000ULL) == CVal)
5794 break;
5795 uint64_t NCVal = ~(uint32_t)CVal;
5796 if ((NCVal & 0xFFFFULL) == NCVal)
5797 break;
5798 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5799 break;
5800 return;
5801 }
5802 case 'N': {
5803 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5804 break;
5805 if ((CVal & 0xFFFFULL) == CVal)
5806 break;
5807 if ((CVal & 0xFFFF0000ULL) == CVal)
5808 break;
5809 if ((CVal & 0xFFFF00000000ULL) == CVal)
5810 break;
5811 if ((CVal & 0xFFFF000000000000ULL) == CVal)
5812 break;
5813 uint64_t NCVal = ~CVal;
5814 if ((NCVal & 0xFFFFULL) == NCVal)
5815 break;
5816 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5817 break;
5818 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
5819 break;
5820 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
5821 break;
5822 return;
5823 }
5824 default:
5825 return;
5826 }
5827
5828 // All assembler immediates are 64-bit integers.
5829 Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
5830 break;
5831 }
5832
5833 if (Result.getNode()) {
5834 Ops.push_back(Result);
5835 return;
5836 }
5837
5838 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5839}
5840
5841//===----------------------------------------------------------------------===//
5842// AArch64 Advanced SIMD Support
5843//===----------------------------------------------------------------------===//
5844
5845/// WidenVector - Given a value in the V64 register class, produce the
5846/// equivalent value in the V128 register class.
5847static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
5848 EVT VT = V64Reg.getValueType();
5849 unsigned NarrowSize = VT.getVectorNumElements();
5850 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5851 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
5852 SDLoc DL(V64Reg);
5853
5854 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
5855 V64Reg, DAG.getConstant(0, DL, MVT::i32));
5856}
5857
5858/// getExtFactor - Determine the adjustment factor for the position when
5859/// generating an "extract from vector registers" instruction.
5860static unsigned getExtFactor(SDValue &V) {
5861 EVT EltType = V.getValueType().getVectorElementType();
5862 return EltType.getSizeInBits() / 8;
5863}
5864
5865/// NarrowVector - Given a value in the V128 register class, produce the
5866/// equivalent value in the V64 register class.
5867static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
5868 EVT VT = V128Reg.getValueType();
5869 unsigned WideSize = VT.getVectorNumElements();
5870 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5871 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
5872 SDLoc DL(V128Reg);
5873
5874 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
5875}
5876
5877// Gather data to see if the operation can be modelled as a
5878// shuffle in combination with VEXTs.
5879SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
5880 SelectionDAG &DAG) const {
5881 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")((Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-8~svn350071/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5881, __PRETTY_FUNCTION__))
;
5882 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::ReconstructShuffle\n"
; } } while (false)
;
5883 SDLoc dl(Op);
5884 EVT VT = Op.getValueType();
5885 unsigned NumElts = VT.getVectorNumElements();
5886
5887 struct ShuffleSourceInfo {
5888 SDValue Vec;
5889 unsigned MinElt;
5890 unsigned MaxElt;
5891
5892 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
5893 // be compatible with the shuffle we intend to construct. As a result
5894 // ShuffleVec will be some sliding window into the original Vec.
5895 SDValue ShuffleVec;
5896
5897 // Code should guarantee that element i in Vec starts at element "WindowBase
5898 // + i * WindowScale in ShuffleVec".
5899 int WindowBase;
5900 int WindowScale;
5901
5902 ShuffleSourceInfo(SDValue Vec)
5903 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
5904 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
5905
5906 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
5907 };
5908
5909 // First gather all vectors used as an immediate source for this BUILD_VECTOR
5910 // node.
5911 SmallVector<ShuffleSourceInfo, 2> Sources;
5912 for (unsigned i = 0; i < NumElts; ++i) {
5913 SDValue V = Op.getOperand(i);
5914 if (V.isUndef())
5915 continue;
5916 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5917 !isa<ConstantSDNode>(V.getOperand(1))) {
5918 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5919 dbgs() << "Reshuffle failed: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5920 "a shuffle can only come from building a vector from "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5921 "various elements of other vectors, provided their "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5922 "indices are constant\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
;
5923 return SDValue();
5924 }
5925
5926 // Add this element source to the list if it's not already there.
5927 SDValue SourceVec = V.getOperand(0);
5928 auto Source = find(Sources, SourceVec);
5929 if (Source == Sources.end())
5930 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
5931
5932 // Update the minimum and maximum lane number seen.
5933 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
5934 Source->MinElt = std::min(Source->MinElt, EltNo);
5935 Source->MaxElt = std::max(Source->MaxElt, EltNo);
5936 }
5937
5938 if (Sources.size() > 2) {
5939 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
5940 dbgs() << "Reshuffle failed: currently only do something sane when at "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
5941 "most two source vectors are involved\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
;
5942 return SDValue();
5943 }
5944
5945 // Find out the smallest element size among result and two sources, and use
5946 // it as element size to build the shuffle_vector.
5947 EVT SmallestEltTy = VT.getVectorElementType();
5948 for (auto &Source : Sources) {
5949 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
5950 if (SrcEltTy.bitsLT(SmallestEltTy)) {
5951 SmallestEltTy = SrcEltTy;
5952 }
5953 }
5954 unsigned ResMultiplier =
5955 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
5956 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
5957 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
5958
5959<