Bug Summary

File:lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 8780, column 48
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-8/lib/clang/8.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/include -I /build/llvm-toolchain-snapshot-8~svn345461/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/include/clang/8.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-8/lib/clang/8.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-8~svn345461/build-llvm/lib/Target/AArch64 -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-10-27-211344-32123-1 -x c++ /build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp -faddrsig

/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the AArch64TargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64ISelLowering.h"
15#include "AArch64CallingConvention.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/CallingConvLower.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstr.h"
38#include "llvm/CodeGen/MachineInstrBuilder.h"
39#include "llvm/CodeGen/MachineMemOperand.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/RuntimeLibcalls.h"
42#include "llvm/CodeGen/SelectionDAG.h"
43#include "llvm/CodeGen/SelectionDAGNodes.h"
44#include "llvm/CodeGen/TargetCallingConv.h"
45#include "llvm/CodeGen/TargetInstrInfo.h"
46#include "llvm/CodeGen/ValueTypes.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/DebugLoc.h"
51#include "llvm/IR/DerivedTypes.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/GetElementPtrTypeIterator.h"
54#include "llvm/IR/GlobalValue.h"
55#include "llvm/IR/IRBuilder.h"
56#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Instructions.h"
58#include "llvm/IR/Intrinsics.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/OperandTraits.h"
61#include "llvm/IR/Type.h"
62#include "llvm/IR/Use.h"
63#include "llvm/IR/Value.h"
64#include "llvm/MC/MCRegisterInfo.h"
65#include "llvm/Support/Casting.h"
66#include "llvm/Support/CodeGen.h"
67#include "llvm/Support/CommandLine.h"
68#include "llvm/Support/Compiler.h"
69#include "llvm/Support/Debug.h"
70#include "llvm/Support/ErrorHandling.h"
71#include "llvm/Support/KnownBits.h"
72#include "llvm/Support/MachineValueType.h"
73#include "llvm/Support/MathExtras.h"
74#include "llvm/Support/raw_ostream.h"
75#include "llvm/Target/TargetMachine.h"
76#include "llvm/Target/TargetOptions.h"
77#include <algorithm>
78#include <bitset>
79#include <cassert>
80#include <cctype>
81#include <cstdint>
82#include <cstdlib>
83#include <iterator>
84#include <limits>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90
91#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
92
93STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
94STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts", {0}, {false}}
;
95STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized", {0}, {false}}
;
96
97static cl::opt<bool>
98EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
99 cl::desc("Allow AArch64 SLI/SRI formation"),
100 cl::init(false));
101
102// FIXME: The necessary dtprel relocations don't seem to be supported
103// well in the GNU bfd and gold linkers at the moment. Therefore, by
104// default, for now, fall back to GeneralDynamic code generation.
105cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
106 "aarch64-elf-ldtls-generation", cl::Hidden,
107 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
108 cl::init(false));
109
110static cl::opt<bool>
111EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
112 cl::desc("Enable AArch64 logical imm instruction "
113 "optimization"),
114 cl::init(true));
115
116/// Value type used for condition codes.
117static const MVT MVT_CC = MVT::i32;
118
119AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
120 const AArch64Subtarget &STI)
121 : TargetLowering(TM), Subtarget(&STI) {
122 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
123 // we have to make something up. Arbitrarily, choose ZeroOrOne.
124 setBooleanContents(ZeroOrOneBooleanContent);
125 // When comparing vectors the result sets the different elements in the
126 // vector to all-one or all-zero.
127 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
128
129 // Set up the register classes.
130 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
131 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
132
133 if (Subtarget->hasFPARMv8()) {
134 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
135 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
136 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
137 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
138 }
139
140 if (Subtarget->hasNEON()) {
141 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
142 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
143 // Someone set us up the NEON.
144 addDRTypeForNEON(MVT::v2f32);
145 addDRTypeForNEON(MVT::v8i8);
146 addDRTypeForNEON(MVT::v4i16);
147 addDRTypeForNEON(MVT::v2i32);
148 addDRTypeForNEON(MVT::v1i64);
149 addDRTypeForNEON(MVT::v1f64);
150 addDRTypeForNEON(MVT::v4f16);
151
152 addQRTypeForNEON(MVT::v4f32);
153 addQRTypeForNEON(MVT::v2f64);
154 addQRTypeForNEON(MVT::v16i8);
155 addQRTypeForNEON(MVT::v8i16);
156 addQRTypeForNEON(MVT::v4i32);
157 addQRTypeForNEON(MVT::v2i64);
158 addQRTypeForNEON(MVT::v8f16);
159 }
160
161 // Compute derived properties from the register classes
162 computeRegisterProperties(Subtarget->getRegisterInfo());
163
164 // Provide all sorts of operation actions
165 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
166 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
167 setOperationAction(ISD::SETCC, MVT::i32, Custom);
168 setOperationAction(ISD::SETCC, MVT::i64, Custom);
169 setOperationAction(ISD::SETCC, MVT::f16, Custom);
170 setOperationAction(ISD::SETCC, MVT::f32, Custom);
171 setOperationAction(ISD::SETCC, MVT::f64, Custom);
172 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
173 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
174 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
175 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
176 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
177 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
178 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
179 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
180 setOperationAction(ISD::SELECT, MVT::i32, Custom);
181 setOperationAction(ISD::SELECT, MVT::i64, Custom);
182 setOperationAction(ISD::SELECT, MVT::f16, Custom);
183 setOperationAction(ISD::SELECT, MVT::f32, Custom);
184 setOperationAction(ISD::SELECT, MVT::f64, Custom);
185 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
186 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
187 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
188 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
189 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
190 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
191 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
192
193 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
194 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
195 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
196
197 setOperationAction(ISD::FREM, MVT::f32, Expand);
198 setOperationAction(ISD::FREM, MVT::f64, Expand);
199 setOperationAction(ISD::FREM, MVT::f80, Expand);
200
201 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
202
203 // Custom lowering hooks are needed for XOR
204 // to fold it into CSINC/CSINV.
205 setOperationAction(ISD::XOR, MVT::i32, Custom);
206 setOperationAction(ISD::XOR, MVT::i64, Custom);
207
208 // Virtually no operation on f128 is legal, but LLVM can't expand them when
209 // there's a valid register class, so we need custom operations in most cases.
210 setOperationAction(ISD::FABS, MVT::f128, Expand);
211 setOperationAction(ISD::FADD, MVT::f128, Custom);
212 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
213 setOperationAction(ISD::FCOS, MVT::f128, Expand);
214 setOperationAction(ISD::FDIV, MVT::f128, Custom);
215 setOperationAction(ISD::FMA, MVT::f128, Expand);
216 setOperationAction(ISD::FMUL, MVT::f128, Custom);
217 setOperationAction(ISD::FNEG, MVT::f128, Expand);
218 setOperationAction(ISD::FPOW, MVT::f128, Expand);
219 setOperationAction(ISD::FREM, MVT::f128, Expand);
220 setOperationAction(ISD::FRINT, MVT::f128, Expand);
221 setOperationAction(ISD::FSIN, MVT::f128, Expand);
222 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
223 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
224 setOperationAction(ISD::FSUB, MVT::f128, Custom);
225 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
226 setOperationAction(ISD::SETCC, MVT::f128, Custom);
227 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
228 setOperationAction(ISD::SELECT, MVT::f128, Custom);
229 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
230 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
231
232 // Lowering for many of the conversions is actually specified by the non-f128
233 // type. The LowerXXX function will be trivial when f128 isn't involved.
234 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
235 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
236 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
237 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
238 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
239 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
240 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
241 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
242 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
243 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
244 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
245 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
246 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
247 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
248
249 // Variable arguments.
250 setOperationAction(ISD::VASTART, MVT::Other, Custom);
251 setOperationAction(ISD::VAARG, MVT::Other, Custom);
252 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
253 setOperationAction(ISD::VAEND, MVT::Other, Expand);
254
255 // Variable-sized objects.
256 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
257 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
258
259 if (Subtarget->isTargetWindows())
260 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
261 else
262 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
263
264 // Constant pool entries
265 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
266
267 // BlockAddress
268 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
269
270 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
271 setOperationAction(ISD::ADDC, MVT::i32, Custom);
272 setOperationAction(ISD::ADDE, MVT::i32, Custom);
273 setOperationAction(ISD::SUBC, MVT::i32, Custom);
274 setOperationAction(ISD::SUBE, MVT::i32, Custom);
275 setOperationAction(ISD::ADDC, MVT::i64, Custom);
276 setOperationAction(ISD::ADDE, MVT::i64, Custom);
277 setOperationAction(ISD::SUBC, MVT::i64, Custom);
278 setOperationAction(ISD::SUBE, MVT::i64, Custom);
279
280 // AArch64 lacks both left-rotate and popcount instructions.
281 setOperationAction(ISD::ROTL, MVT::i32, Expand);
282 setOperationAction(ISD::ROTL, MVT::i64, Expand);
283 for (MVT VT : MVT::vector_valuetypes()) {
284 setOperationAction(ISD::ROTL, VT, Expand);
285 setOperationAction(ISD::ROTR, VT, Expand);
286 }
287
288 // AArch64 doesn't have {U|S}MUL_LOHI.
289 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
290 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
291
292 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
293 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
294
295 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
296 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
297 for (MVT VT : MVT::vector_valuetypes()) {
298 setOperationAction(ISD::SDIVREM, VT, Expand);
299 setOperationAction(ISD::UDIVREM, VT, Expand);
300 }
301 setOperationAction(ISD::SREM, MVT::i32, Expand);
302 setOperationAction(ISD::SREM, MVT::i64, Expand);
303 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
304 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
305 setOperationAction(ISD::UREM, MVT::i32, Expand);
306 setOperationAction(ISD::UREM, MVT::i64, Expand);
307
308 // Custom lower Add/Sub/Mul with overflow.
309 setOperationAction(ISD::SADDO, MVT::i32, Custom);
310 setOperationAction(ISD::SADDO, MVT::i64, Custom);
311 setOperationAction(ISD::UADDO, MVT::i32, Custom);
312 setOperationAction(ISD::UADDO, MVT::i64, Custom);
313 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
314 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
315 setOperationAction(ISD::USUBO, MVT::i32, Custom);
316 setOperationAction(ISD::USUBO, MVT::i64, Custom);
317 setOperationAction(ISD::SMULO, MVT::i32, Custom);
318 setOperationAction(ISD::SMULO, MVT::i64, Custom);
319 setOperationAction(ISD::UMULO, MVT::i32, Custom);
320 setOperationAction(ISD::UMULO, MVT::i64, Custom);
321
322 setOperationAction(ISD::FSIN, MVT::f32, Expand);
323 setOperationAction(ISD::FSIN, MVT::f64, Expand);
324 setOperationAction(ISD::FCOS, MVT::f32, Expand);
325 setOperationAction(ISD::FCOS, MVT::f64, Expand);
326 setOperationAction(ISD::FPOW, MVT::f32, Expand);
327 setOperationAction(ISD::FPOW, MVT::f64, Expand);
328 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
329 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
330 if (Subtarget->hasFullFP16())
331 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
332 else
333 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
334
335 setOperationAction(ISD::FREM, MVT::f16, Promote);
336 setOperationAction(ISD::FREM, MVT::v4f16, Promote);
337 setOperationAction(ISD::FREM, MVT::v8f16, Promote);
338 setOperationAction(ISD::FPOW, MVT::f16, Promote);
339 setOperationAction(ISD::FPOW, MVT::v4f16, Promote);
340 setOperationAction(ISD::FPOW, MVT::v8f16, Promote);
341 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
342 setOperationAction(ISD::FCOS, MVT::f16, Promote);
343 setOperationAction(ISD::FCOS, MVT::v4f16, Promote);
344 setOperationAction(ISD::FCOS, MVT::v8f16, Promote);
345 setOperationAction(ISD::FSIN, MVT::f16, Promote);
346 setOperationAction(ISD::FSIN, MVT::v4f16, Promote);
347 setOperationAction(ISD::FSIN, MVT::v8f16, Promote);
348 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
349 setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
350 setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
351 setOperationAction(ISD::FEXP, MVT::f16, Promote);
352 setOperationAction(ISD::FEXP, MVT::v4f16, Promote);
353 setOperationAction(ISD::FEXP, MVT::v8f16, Promote);
354 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
355 setOperationAction(ISD::FEXP2, MVT::v4f16, Promote);
356 setOperationAction(ISD::FEXP2, MVT::v8f16, Promote);
357 setOperationAction(ISD::FLOG, MVT::f16, Promote);
358 setOperationAction(ISD::FLOG, MVT::v4f16, Promote);
359 setOperationAction(ISD::FLOG, MVT::v8f16, Promote);
360 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
361 setOperationAction(ISD::FLOG2, MVT::v4f16, Promote);
362 setOperationAction(ISD::FLOG2, MVT::v8f16, Promote);
363 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
364 setOperationAction(ISD::FLOG10, MVT::v4f16, Promote);
365 setOperationAction(ISD::FLOG10, MVT::v8f16, Promote);
366
367 if (!Subtarget->hasFullFP16()) {
368 setOperationAction(ISD::SELECT, MVT::f16, Promote);
369 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
370 setOperationAction(ISD::SETCC, MVT::f16, Promote);
371 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
372 setOperationAction(ISD::FADD, MVT::f16, Promote);
373 setOperationAction(ISD::FSUB, MVT::f16, Promote);
374 setOperationAction(ISD::FMUL, MVT::f16, Promote);
375 setOperationAction(ISD::FDIV, MVT::f16, Promote);
376 setOperationAction(ISD::FMA, MVT::f16, Promote);
377 setOperationAction(ISD::FNEG, MVT::f16, Promote);
378 setOperationAction(ISD::FABS, MVT::f16, Promote);
379 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
380 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
381 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
382 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
383 setOperationAction(ISD::FRINT, MVT::f16, Promote);
384 setOperationAction(ISD::FROUND, MVT::f16, Promote);
385 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
386 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
387 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
388 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
389 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
390
391 // promote v4f16 to v4f32 when that is known to be safe.
392 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
393 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
394 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
395 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
396 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
397 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
398 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
399 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
400 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
401 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
402 AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
403 AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
404
405 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
406 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
407 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
408 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
409 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
410 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
411 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
412 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
413 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
414 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
415 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
416 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
417 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
418 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
419 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
420
421 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
422 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
423 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
424 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
425 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
426 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
427 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
428 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
429 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
430 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
431 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
432 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
433 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
434 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
435 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
436 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
437 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
438 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
439 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
440 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
441 }
442
443 // AArch64 has implementations of a lot of rounding-like FP operations.
444 for (MVT Ty : {MVT::f32, MVT::f64}) {
445 setOperationAction(ISD::FFLOOR, Ty, Legal);
446 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
447 setOperationAction(ISD::FCEIL, Ty, Legal);
448 setOperationAction(ISD::FRINT, Ty, Legal);
449 setOperationAction(ISD::FTRUNC, Ty, Legal);
450 setOperationAction(ISD::FROUND, Ty, Legal);
451 setOperationAction(ISD::FMINNUM, Ty, Legal);
452 setOperationAction(ISD::FMAXNUM, Ty, Legal);
453 setOperationAction(ISD::FMINIMUM, Ty, Legal);
454 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
455 }
456
457 if (Subtarget->hasFullFP16()) {
458 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
459 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
460 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
461 setOperationAction(ISD::FRINT, MVT::f16, Legal);
462 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
463 setOperationAction(ISD::FROUND, MVT::f16, Legal);
464 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
465 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
466 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
467 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
468 }
469
470 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
471
472 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
473
474 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
475 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
476 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
477 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
478 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
479
480 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
481 // This requires the Performance Monitors extension.
482 if (Subtarget->hasPerfMon())
483 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
484
485 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
486 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
487 // Issue __sincos_stret if available.
488 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
489 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
490 } else {
491 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
492 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
493 }
494
495 // Make floating-point constants legal for the large code model, so they don't
496 // become loads from the constant pool.
497 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
498 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
499 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
500 }
501
502 // AArch64 does not have floating-point extending loads, i1 sign-extending
503 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
504 for (MVT VT : MVT::fp_valuetypes()) {
505 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
506 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
507 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
508 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
509 }
510 for (MVT VT : MVT::integer_valuetypes())
511 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
512
513 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
514 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
515 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
516 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
517 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
518 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
519 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
520
521 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
522 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
523
524 // Indexed loads and stores are supported.
525 for (unsigned im = (unsigned)ISD::PRE_INC;
526 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
527 setIndexedLoadAction(im, MVT::i8, Legal);
528 setIndexedLoadAction(im, MVT::i16, Legal);
529 setIndexedLoadAction(im, MVT::i32, Legal);
530 setIndexedLoadAction(im, MVT::i64, Legal);
531 setIndexedLoadAction(im, MVT::f64, Legal);
532 setIndexedLoadAction(im, MVT::f32, Legal);
533 setIndexedLoadAction(im, MVT::f16, Legal);
534 setIndexedStoreAction(im, MVT::i8, Legal);
535 setIndexedStoreAction(im, MVT::i16, Legal);
536 setIndexedStoreAction(im, MVT::i32, Legal);
537 setIndexedStoreAction(im, MVT::i64, Legal);
538 setIndexedStoreAction(im, MVT::f64, Legal);
539 setIndexedStoreAction(im, MVT::f32, Legal);
540 setIndexedStoreAction(im, MVT::f16, Legal);
541 }
542
543 // Trap.
544 setOperationAction(ISD::TRAP, MVT::Other, Legal);
545
546 // We combine OR nodes for bitfield operations.
547 setTargetDAGCombine(ISD::OR);
548
549 // Vector add and sub nodes may conceal a high-half opportunity.
550 // Also, try to fold ADD into CSINC/CSINV..
551 setTargetDAGCombine(ISD::ADD);
552 setTargetDAGCombine(ISD::SUB);
553 setTargetDAGCombine(ISD::SRL);
554 setTargetDAGCombine(ISD::XOR);
555 setTargetDAGCombine(ISD::SINT_TO_FP);
556 setTargetDAGCombine(ISD::UINT_TO_FP);
557
558 setTargetDAGCombine(ISD::FP_TO_SINT);
559 setTargetDAGCombine(ISD::FP_TO_UINT);
560 setTargetDAGCombine(ISD::FDIV);
561
562 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
563
564 setTargetDAGCombine(ISD::ANY_EXTEND);
565 setTargetDAGCombine(ISD::ZERO_EXTEND);
566 setTargetDAGCombine(ISD::SIGN_EXTEND);
567 setTargetDAGCombine(ISD::BITCAST);
568 setTargetDAGCombine(ISD::CONCAT_VECTORS);
569 setTargetDAGCombine(ISD::STORE);
570 if (Subtarget->supportsAddressTopByteIgnored())
571 setTargetDAGCombine(ISD::LOAD);
572
573 setTargetDAGCombine(ISD::MUL);
574
575 setTargetDAGCombine(ISD::SELECT);
576 setTargetDAGCombine(ISD::VSELECT);
577
578 setTargetDAGCombine(ISD::INTRINSIC_VOID);
579 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
580 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
581
582 setTargetDAGCombine(ISD::GlobalAddress);
583
584 // In case of strict alignment, avoid an excessive number of byte wide stores.
585 MaxStoresPerMemsetOptSize = 8;
586 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
587 ? MaxStoresPerMemsetOptSize : 32;
588
589 MaxGluedStoresPerMemcpy = 4;
590 MaxStoresPerMemcpyOptSize = 4;
591 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
592 ? MaxStoresPerMemcpyOptSize : 16;
593
594 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
595
596 setStackPointerRegisterToSaveRestore(AArch64::SP);
597
598 setSchedulingPreference(Sched::Hybrid);
599
600 EnableExtLdPromotion = true;
601
602 // Set required alignment.
603 setMinFunctionAlignment(2);
604 // Set preferred alignments.
605 setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
606 setPrefLoopAlignment(STI.getPrefLoopAlignment());
607
608 // Only change the limit for entries in a jump table if specified by
609 // the subtarget, but not at the command line.
610 unsigned MaxJT = STI.getMaximumJumpTableSize();
611 if (MaxJT && getMaximumJumpTableSize() == 0)
612 setMaximumJumpTableSize(MaxJT);
613
614 setHasExtractBitsInsn(true);
615
616 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
617
618 if (Subtarget->hasNEON()) {
619 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
620 // silliness like this:
621 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
622 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
623 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
624 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
625 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
626 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
627 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
628 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
629 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
630 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
631 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
632 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
633 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
634 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
635 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
636 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
637 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
638 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
639 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
640 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
641 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
642 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
643 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
644 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
645 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
646
647 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
648 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
649 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
650 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
651 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
652
653 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
654
655 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
656 // elements smaller than i32, so promote the input to i32 first.
657 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
658 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
659 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
660 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
661 // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
662 // -> v8f16 conversions.
663 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
664 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
665 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
666 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
667 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
668 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
669 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
670 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
671 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
672 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
673 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
674 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
675 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
676
677 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
678 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
679
680 setOperationAction(ISD::CTTZ, MVT::v2i8, Expand);
681 setOperationAction(ISD::CTTZ, MVT::v4i16, Expand);
682 setOperationAction(ISD::CTTZ, MVT::v2i32, Expand);
683 setOperationAction(ISD::CTTZ, MVT::v1i64, Expand);
684 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
685 setOperationAction(ISD::CTTZ, MVT::v8i16, Expand);
686 setOperationAction(ISD::CTTZ, MVT::v4i32, Expand);
687 setOperationAction(ISD::CTTZ, MVT::v2i64, Expand);
688
689 // AArch64 doesn't have MUL.2d:
690 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
691 // Custom handling for some quad-vector types to detect MULL.
692 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
693 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
694 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
695
696 // Vector reductions
697 for (MVT VT : MVT::integer_valuetypes()) {
698 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
699 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
700 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
701 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
702 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
703 }
704 for (MVT VT : MVT::fp_valuetypes()) {
705 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
706 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
707 }
708
709 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
710 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
711 // Likewise, narrowing and extending vector loads/stores aren't handled
712 // directly.
713 for (MVT VT : MVT::vector_valuetypes()) {
714 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
715
716 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
717 setOperationAction(ISD::MULHS, VT, Custom);
718 setOperationAction(ISD::MULHU, VT, Custom);
719 } else {
720 setOperationAction(ISD::MULHS, VT, Expand);
721 setOperationAction(ISD::MULHU, VT, Expand);
722 }
723 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
724 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
725
726 setOperationAction(ISD::BSWAP, VT, Expand);
727
728 for (MVT InnerVT : MVT::vector_valuetypes()) {
729 setTruncStoreAction(VT, InnerVT, Expand);
730 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
731 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
732 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
733 }
734 }
735
736 // AArch64 has implementations of a lot of rounding-like FP operations.
737 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
738 setOperationAction(ISD::FFLOOR, Ty, Legal);
739 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
740 setOperationAction(ISD::FCEIL, Ty, Legal);
741 setOperationAction(ISD::FRINT, Ty, Legal);
742 setOperationAction(ISD::FTRUNC, Ty, Legal);
743 setOperationAction(ISD::FROUND, Ty, Legal);
744 }
745
746 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
747 }
748
749 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
750}
751
752void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
753 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 753, __PRETTY_FUNCTION__))
;
754
755 if (VT.isFloatingPoint()) {
756 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
757 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
758 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
759 }
760
761 // Mark vector float intrinsics as expand.
762 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
763 setOperationAction(ISD::FSIN, VT, Expand);
764 setOperationAction(ISD::FCOS, VT, Expand);
765 setOperationAction(ISD::FPOW, VT, Expand);
766 setOperationAction(ISD::FLOG, VT, Expand);
767 setOperationAction(ISD::FLOG2, VT, Expand);
768 setOperationAction(ISD::FLOG10, VT, Expand);
769 setOperationAction(ISD::FEXP, VT, Expand);
770 setOperationAction(ISD::FEXP2, VT, Expand);
771
772 // But we do support custom-lowering for FCOPYSIGN.
773 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
774 }
775
776 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
777 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
778 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
779 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
780 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
781 setOperationAction(ISD::SRA, VT, Custom);
782 setOperationAction(ISD::SRL, VT, Custom);
783 setOperationAction(ISD::SHL, VT, Custom);
784 setOperationAction(ISD::AND, VT, Custom);
785 setOperationAction(ISD::OR, VT, Custom);
786 setOperationAction(ISD::SETCC, VT, Custom);
787 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
788
789 setOperationAction(ISD::SELECT, VT, Expand);
790 setOperationAction(ISD::SELECT_CC, VT, Expand);
791 setOperationAction(ISD::VSELECT, VT, Expand);
792 for (MVT InnerVT : MVT::all_valuetypes())
793 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
794
795 // CNT supports only B element sizes, then use UADDLP to widen.
796 if (VT != MVT::v8i8 && VT != MVT::v16i8)
797 setOperationAction(ISD::CTPOP, VT, Custom);
798
799 setOperationAction(ISD::UDIV, VT, Expand);
800 setOperationAction(ISD::SDIV, VT, Expand);
801 setOperationAction(ISD::UREM, VT, Expand);
802 setOperationAction(ISD::SREM, VT, Expand);
803 setOperationAction(ISD::FREM, VT, Expand);
804
805 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
806 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
807
808 if (!VT.isFloatingPoint())
809 setOperationAction(ISD::ABS, VT, Legal);
810
811 // [SU][MIN|MAX] are available for all NEON types apart from i64.
812 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
813 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
814 setOperationAction(Opcode, VT, Legal);
815
816 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
817 if (VT.isFloatingPoint() &&
818 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
819 for (unsigned Opcode :
820 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
821 setOperationAction(Opcode, VT, Legal);
822
823 if (Subtarget->isLittleEndian()) {
824 for (unsigned im = (unsigned)ISD::PRE_INC;
825 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
826 setIndexedLoadAction(im, VT, Legal);
827 setIndexedStoreAction(im, VT, Legal);
828 }
829 }
830}
831
832void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
833 addRegisterClass(VT, &AArch64::FPR64RegClass);
834 addTypeForNEON(VT, MVT::v2i32);
835}
836
837void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
838 addRegisterClass(VT, &AArch64::FPR128RegClass);
839 addTypeForNEON(VT, MVT::v4i32);
840}
841
842EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
843 EVT VT) const {
844 if (!VT.isVector())
845 return MVT::i32;
846 return VT.changeVectorElementTypeToInteger();
847}
848
849static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
850 const APInt &Demanded,
851 TargetLowering::TargetLoweringOpt &TLO,
852 unsigned NewOpc) {
853 uint64_t OldImm = Imm, NewImm, Enc;
854 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
855
856 // Return if the immediate is already all zeros, all ones, a bimm32 or a
857 // bimm64.
858 if (Imm == 0 || Imm == Mask ||
859 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
860 return false;
861
862 unsigned EltSize = Size;
863 uint64_t DemandedBits = Demanded.getZExtValue();
864
865 // Clear bits that are not demanded.
866 Imm &= DemandedBits;
867
868 while (true) {
869 // The goal here is to set the non-demanded bits in a way that minimizes
870 // the number of switching between 0 and 1. In order to achieve this goal,
871 // we set the non-demanded bits to the value of the preceding demanded bits.
872 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
873 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
874 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
875 // The final result is 0b11000011.
876 uint64_t NonDemandedBits = ~DemandedBits;
877 uint64_t InvertedImm = ~Imm & DemandedBits;
878 uint64_t RotatedImm =
879 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
880 NonDemandedBits;
881 uint64_t Sum = RotatedImm + NonDemandedBits;
882 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
883 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
884 NewImm = (Imm | Ones) & Mask;
885
886 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
887 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
888 // we halve the element size and continue the search.
889 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
890 break;
891
892 // We cannot shrink the element size any further if it is 2-bits.
893 if (EltSize == 2)
894 return false;
895
896 EltSize /= 2;
897 Mask >>= EltSize;
898 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
899
900 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
901 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
902 return false;
903
904 // Merge the upper and lower halves of Imm and DemandedBits.
905 Imm |= Hi;
906 DemandedBits |= DemandedBitsHi;
907 }
908
909 ++NumOptimizedImms;
910
911 // Replicate the element across the register width.
912 while (EltSize < Size) {
913 NewImm |= NewImm << EltSize;
914 EltSize *= 2;
915 }
916
917 (void)OldImm;
918 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 919, __PRETTY_FUNCTION__))
919 "demanded bits should never be altered")((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 919, __PRETTY_FUNCTION__))
;
920 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")((OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? static_cast<void> (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 920, __PRETTY_FUNCTION__))
;
921
922 // Create the new constant immediate node.
923 EVT VT = Op.getValueType();
924 SDLoc DL(Op);
925 SDValue New;
926
927 // If the new constant immediate is all-zeros or all-ones, let the target
928 // independent DAG combine optimize this node.
929 if (NewImm == 0 || NewImm == OrigMask) {
930 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
931 TLO.DAG.getConstant(NewImm, DL, VT));
932 // Otherwise, create a machine node so that target independent DAG combine
933 // doesn't undo this optimization.
934 } else {
935 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
936 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
937 New = SDValue(
938 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
939 }
940
941 return TLO.CombineTo(Op, New);
942}
943
944bool AArch64TargetLowering::targetShrinkDemandedConstant(
945 SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
946 // Delay this optimization to as late as possible.
947 if (!TLO.LegalOps)
948 return false;
949
950 if (!EnableOptimizeLogicalImm)
951 return false;
952
953 EVT VT = Op.getValueType();
954 if (VT.isVector())
955 return false;
956
957 unsigned Size = VT.getSizeInBits();
958 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 959, __PRETTY_FUNCTION__))
959 "i32 or i64 is expected after legalization.")(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 959, __PRETTY_FUNCTION__))
;
960
961 // Exit early if we demand all bits.
962 if (Demanded.countPopulation() == Size)
963 return false;
964
965 unsigned NewOpc;
966 switch (Op.getOpcode()) {
967 default:
968 return false;
969 case ISD::AND:
970 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
971 break;
972 case ISD::OR:
973 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
974 break;
975 case ISD::XOR:
976 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
977 break;
978 }
979 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
980 if (!C)
981 return false;
982 uint64_t Imm = C->getZExtValue();
983 return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
984}
985
986/// computeKnownBitsForTargetNode - Determine which of the bits specified in
987/// Mask are known to be either zero or one and return them Known.
988void AArch64TargetLowering::computeKnownBitsForTargetNode(
989 const SDValue Op, KnownBits &Known,
990 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
991 switch (Op.getOpcode()) {
992 default:
993 break;
994 case AArch64ISD::CSEL: {
995 KnownBits Known2;
996 DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
997 DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
998 Known.Zero &= Known2.Zero;
999 Known.One &= Known2.One;
1000 break;
1001 }
1002 case ISD::INTRINSIC_W_CHAIN: {
1003 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1004 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1005 switch (IntID) {
1006 default: return;
1007 case Intrinsic::aarch64_ldaxr:
1008 case Intrinsic::aarch64_ldxr: {
1009 unsigned BitWidth = Known.getBitWidth();
1010 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1011 unsigned MemBits = VT.getScalarSizeInBits();
1012 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1013 return;
1014 }
1015 }
1016 break;
1017 }
1018 case ISD::INTRINSIC_WO_CHAIN:
1019 case ISD::INTRINSIC_VOID: {
1020 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1021 switch (IntNo) {
1022 default:
1023 break;
1024 case Intrinsic::aarch64_neon_umaxv:
1025 case Intrinsic::aarch64_neon_uminv: {
1026 // Figure out the datatype of the vector operand. The UMINV instruction
1027 // will zero extend the result, so we can mark as known zero all the
1028 // bits larger than the element datatype. 32-bit or larget doesn't need
1029 // this as those are legal types and will be handled by isel directly.
1030 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1031 unsigned BitWidth = Known.getBitWidth();
1032 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1033 assert(BitWidth >= 8 && "Unexpected width!")((BitWidth >= 8 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1033, __PRETTY_FUNCTION__))
;
1034 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1035 Known.Zero |= Mask;
1036 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1037 assert(BitWidth >= 16 && "Unexpected width!")((BitWidth >= 16 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1037, __PRETTY_FUNCTION__))
;
1038 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1039 Known.Zero |= Mask;
1040 }
1041 break;
1042 } break;
1043 }
1044 }
1045 }
1046}
1047
1048MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1049 EVT) const {
1050 return MVT::i64;
1051}
1052
1053bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1054 unsigned AddrSpace,
1055 unsigned Align,
1056 bool *Fast) const {
1057 if (Subtarget->requiresStrictAlign())
1058 return false;
1059
1060 if (Fast) {
1061 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1062 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1063 // See comments in performSTORECombine() for more details about
1064 // these conditions.
1065
1066 // Code that uses clang vector extensions can mark that it
1067 // wants unaligned accesses to be treated as fast by
1068 // underspecifying alignment to be 1 or 2.
1069 Align <= 2 ||
1070
1071 // Disregard v2i64. Memcpy lowering produces those and splitting
1072 // them regresses performance on micro-benchmarks and olden/bh.
1073 VT == MVT::v2i64;
1074 }
1075 return true;
1076}
1077
1078FastISel *
1079AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1080 const TargetLibraryInfo *libInfo) const {
1081 return AArch64::createFastISel(funcInfo, libInfo);
1082}
1083
1084const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1085 switch ((AArch64ISD::NodeType)Opcode) {
1086 case AArch64ISD::FIRST_NUMBER: break;
1087 case AArch64ISD::CALL: return "AArch64ISD::CALL";
1088 case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
1089 case AArch64ISD::ADR: return "AArch64ISD::ADR";
1090 case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
1091 case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
1092 case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
1093 case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
1094 case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
1095 case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
1096 case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
1097 case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
1098 case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
1099 case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
1100 case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1101 case AArch64ISD::ADC: return "AArch64ISD::ADC";
1102 case AArch64ISD::SBC: return "AArch64ISD::SBC";
1103 case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
1104 case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
1105 case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
1106 case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
1107 case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
1108 case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
1109 case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
1110 case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
1111 case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
1112 case AArch64ISD::DUP: return "AArch64ISD::DUP";
1113 case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
1114 case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
1115 case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
1116 case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
1117 case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
1118 case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
1119 case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
1120 case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
1121 case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
1122 case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
1123 case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
1124 case AArch64ISD::BICi: return "AArch64ISD::BICi";
1125 case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
1126 case AArch64ISD::BSL: return "AArch64ISD::BSL";
1127 case AArch64ISD::NEG: return "AArch64ISD::NEG";
1128 case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
1129 case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
1130 case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
1131 case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
1132 case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
1133 case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
1134 case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
1135 case AArch64ISD::REV16: return "AArch64ISD::REV16";
1136 case AArch64ISD::REV32: return "AArch64ISD::REV32";
1137 case AArch64ISD::REV64: return "AArch64ISD::REV64";
1138 case AArch64ISD::EXT: return "AArch64ISD::EXT";
1139 case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
1140 case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
1141 case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
1142 case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
1143 case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
1144 case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
1145 case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
1146 case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
1147 case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
1148 case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
1149 case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
1150 case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
1151 case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
1152 case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
1153 case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
1154 case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
1155 case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
1156 case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
1157 case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
1158 case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
1159 case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
1160 case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
1161 case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
1162 case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
1163 case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
1164 case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
1165 case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
1166 case AArch64ISD::NOT: return "AArch64ISD::NOT";
1167 case AArch64ISD::BIT: return "AArch64ISD::BIT";
1168 case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
1169 case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
1170 case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
1171 case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
1172 case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
1173 case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
1174 case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
1175 case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
1176 case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
1177 case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
1178 case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
1179 case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
1180 case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
1181 case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
1182 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
1183 case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
1184 case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
1185 case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
1186 case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
1187 case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
1188 case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
1189 case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
1190 case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
1191 case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
1192 case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
1193 case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
1194 case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1195 case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1196 case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1197 case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1198 case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1199 case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1200 case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1201 case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1202 case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1203 case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1204 case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1205 case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1206 case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1207 case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1208 case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1209 case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1210 case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1211 case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1212 }
1213 return nullptr;
1214}
1215
1216MachineBasicBlock *
1217AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
1218 MachineBasicBlock *MBB) const {
1219 // We materialise the F128CSEL pseudo-instruction as some control flow and a
1220 // phi node:
1221
1222 // OrigBB:
1223 // [... previous instrs leading to comparison ...]
1224 // b.ne TrueBB
1225 // b EndBB
1226 // TrueBB:
1227 // ; Fallthrough
1228 // EndBB:
1229 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1230
1231 MachineFunction *MF = MBB->getParent();
1232 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1233 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1234 DebugLoc DL = MI.getDebugLoc();
1235 MachineFunction::iterator It = ++MBB->getIterator();
1236
1237 unsigned DestReg = MI.getOperand(0).getReg();
1238 unsigned IfTrueReg = MI.getOperand(1).getReg();
1239 unsigned IfFalseReg = MI.getOperand(2).getReg();
1240 unsigned CondCode = MI.getOperand(3).getImm();
1241 bool NZCVKilled = MI.getOperand(4).isKill();
1242
1243 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1244 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1245 MF->insert(It, TrueBB);
1246 MF->insert(It, EndBB);
1247
1248 // Transfer rest of current basic-block to EndBB
1249 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1250 MBB->end());
1251 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1252
1253 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1254 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1255 MBB->addSuccessor(TrueBB);
1256 MBB->addSuccessor(EndBB);
1257
1258 // TrueBB falls through to the end.
1259 TrueBB->addSuccessor(EndBB);
1260
1261 if (!NZCVKilled) {
1262 TrueBB->addLiveIn(AArch64::NZCV);
1263 EndBB->addLiveIn(AArch64::NZCV);
1264 }
1265
1266 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1267 .addReg(IfTrueReg)
1268 .addMBB(TrueBB)
1269 .addReg(IfFalseReg)
1270 .addMBB(MBB);
1271
1272 MI.eraseFromParent();
1273 return EndBB;
1274}
1275
1276MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
1277 MachineInstr &MI, MachineBasicBlock *BB) const {
1278 switch (MI.getOpcode()) {
1279 default:
1280#ifndef NDEBUG
1281 MI.dump();
1282#endif
1283 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1283)
;
1284
1285 case AArch64::F128CSEL:
1286 return EmitF128CSEL(MI, BB);
1287
1288 case TargetOpcode::STACKMAP:
1289 case TargetOpcode::PATCHPOINT:
1290 return emitPatchPoint(MI, BB);
1291 }
1292}
1293
1294//===----------------------------------------------------------------------===//
1295// AArch64 Lowering private implementation.
1296//===----------------------------------------------------------------------===//
1297
1298//===----------------------------------------------------------------------===//
1299// Lowering Code
1300//===----------------------------------------------------------------------===//
1301
1302/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1303/// CC
1304static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1305 switch (CC) {
1306 default:
1307 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1307)
;
1308 case ISD::SETNE:
1309 return AArch64CC::NE;
1310 case ISD::SETEQ:
1311 return AArch64CC::EQ;
1312 case ISD::SETGT:
1313 return AArch64CC::GT;
1314 case ISD::SETGE:
1315 return AArch64CC::GE;
1316 case ISD::SETLT:
1317 return AArch64CC::LT;
1318 case ISD::SETLE:
1319 return AArch64CC::LE;
1320 case ISD::SETUGT:
1321 return AArch64CC::HI;
1322 case ISD::SETUGE:
1323 return AArch64CC::HS;
1324 case ISD::SETULT:
1325 return AArch64CC::LO;
1326 case ISD::SETULE:
1327 return AArch64CC::LS;
1328 }
1329}
1330
1331/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1332static void changeFPCCToAArch64CC(ISD::CondCode CC,
1333 AArch64CC::CondCode &CondCode,
1334 AArch64CC::CondCode &CondCode2) {
1335 CondCode2 = AArch64CC::AL;
1336 switch (CC) {
1337 default:
1338 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1338)
;
1339 case ISD::SETEQ:
1340 case ISD::SETOEQ:
1341 CondCode = AArch64CC::EQ;
1342 break;
1343 case ISD::SETGT:
1344 case ISD::SETOGT:
1345 CondCode = AArch64CC::GT;
1346 break;
1347 case ISD::SETGE:
1348 case ISD::SETOGE:
1349 CondCode = AArch64CC::GE;
1350 break;
1351 case ISD::SETOLT:
1352 CondCode = AArch64CC::MI;
1353 break;
1354 case ISD::SETOLE:
1355 CondCode = AArch64CC::LS;
1356 break;
1357 case ISD::SETONE:
1358 CondCode = AArch64CC::MI;
1359 CondCode2 = AArch64CC::GT;
1360 break;
1361 case ISD::SETO:
1362 CondCode = AArch64CC::VC;
1363 break;
1364 case ISD::SETUO:
1365 CondCode = AArch64CC::VS;
1366 break;
1367 case ISD::SETUEQ:
1368 CondCode = AArch64CC::EQ;
1369 CondCode2 = AArch64CC::VS;
1370 break;
1371 case ISD::SETUGT:
1372 CondCode = AArch64CC::HI;
1373 break;
1374 case ISD::SETUGE:
1375 CondCode = AArch64CC::PL;
1376 break;
1377 case ISD::SETLT:
1378 case ISD::SETULT:
1379 CondCode = AArch64CC::LT;
1380 break;
1381 case ISD::SETLE:
1382 case ISD::SETULE:
1383 CondCode = AArch64CC::LE;
1384 break;
1385 case ISD::SETNE:
1386 case ISD::SETUNE:
1387 CondCode = AArch64CC::NE;
1388 break;
1389 }
1390}
1391
1392/// Convert a DAG fp condition code to an AArch64 CC.
1393/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1394/// should be AND'ed instead of OR'ed.
1395static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1396 AArch64CC::CondCode &CondCode,
1397 AArch64CC::CondCode &CondCode2) {
1398 CondCode2 = AArch64CC::AL;
1399 switch (CC) {
1400 default:
1401 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1402 assert(CondCode2 == AArch64CC::AL)((CondCode2 == AArch64CC::AL) ? static_cast<void> (0) :
__assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1402, __PRETTY_FUNCTION__))
;
1403 break;
1404 case ISD::SETONE:
1405 // (a one b)
1406 // == ((a olt b) || (a ogt b))
1407 // == ((a ord b) && (a une b))
1408 CondCode = AArch64CC::VC;
1409 CondCode2 = AArch64CC::NE;
1410 break;
1411 case ISD::SETUEQ:
1412 // (a ueq b)
1413 // == ((a uno b) || (a oeq b))
1414 // == ((a ule b) && (a uge b))
1415 CondCode = AArch64CC::PL;
1416 CondCode2 = AArch64CC::LE;
1417 break;
1418 }
1419}
1420
1421/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1422/// CC usable with the vector instructions. Fewer operations are available
1423/// without a real NZCV register, so we have to use less efficient combinations
1424/// to get the same effect.
1425static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1426 AArch64CC::CondCode &CondCode,
1427 AArch64CC::CondCode &CondCode2,
1428 bool &Invert) {
1429 Invert = false;
1430 switch (CC) {
1431 default:
1432 // Mostly the scalar mappings work fine.
1433 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1434 break;
1435 case ISD::SETUO:
1436 Invert = true;
1437 LLVM_FALLTHROUGH[[clang::fallthrough]];
1438 case ISD::SETO:
1439 CondCode = AArch64CC::MI;
1440 CondCode2 = AArch64CC::GE;
1441 break;
1442 case ISD::SETUEQ:
1443 case ISD::SETULT:
1444 case ISD::SETULE:
1445 case ISD::SETUGT:
1446 case ISD::SETUGE:
1447 // All of the compare-mask comparisons are ordered, but we can switch
1448 // between the two by a double inversion. E.g. ULE == !OGT.
1449 Invert = true;
1450 changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
1451 break;
1452 }
1453}
1454
1455static bool isLegalArithImmed(uint64_t C) {
1456 // Matches AArch64DAGToDAGISel::SelectArithImmed().
1457 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1458 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
1459 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
1460 return IsLegal;
1461}
1462
1463// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
1464// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
1465// can be set differently by this operation. It comes down to whether
1466// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1467// everything is fine. If not then the optimization is wrong. Thus general
1468// comparisons are only valid if op2 != 0.
1469//
1470// So, finally, the only LLVM-native comparisons that don't mention C and V
1471// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1472// the absence of information about op2.
1473static bool isCMN(SDValue Op, ISD::CondCode CC) {
1474 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
1475 (CC == ISD::SETEQ || CC == ISD::SETNE);
1476}
1477
1478static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1479 const SDLoc &dl, SelectionDAG &DAG) {
1480 EVT VT = LHS.getValueType();
1481 const bool FullFP16 =
1482 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1483
1484 if (VT.isFloatingPoint()) {
1485 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1485, __PRETTY_FUNCTION__))
;
1486 if (VT == MVT::f16 && !FullFP16) {
1487 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1488 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1489 VT = MVT::f32;
1490 }
1491 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1492 }
1493
1494 // The CMP instruction is just an alias for SUBS, and representing it as
1495 // SUBS means that it's possible to get CSE with subtract operations.
1496 // A later phase can perform the optimization of setting the destination
1497 // register to WZR/XZR if it ends up being unused.
1498 unsigned Opcode = AArch64ISD::SUBS;
1499
1500 if (isCMN(RHS, CC)) {
1501 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
1502 Opcode = AArch64ISD::ADDS;
1503 RHS = RHS.getOperand(1);
1504 } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
1505 !isUnsignedIntSetCC(CC)) {
1506 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1507 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1508 // of the signed comparisons.
1509 Opcode = AArch64ISD::ANDS;
1510 RHS = LHS.getOperand(1);
1511 LHS = LHS.getOperand(0);
1512 }
1513
1514 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1515 .getValue(1);
1516}
1517
1518/// \defgroup AArch64CCMP CMP;CCMP matching
1519///
1520/// These functions deal with the formation of CMP;CCMP;... sequences.
1521/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1522/// a comparison. They set the NZCV flags to a predefined value if their
1523/// predicate is false. This allows to express arbitrary conjunctions, for
1524/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
1525/// expressed as:
1526/// cmp A
1527/// ccmp B, inv(CB), CA
1528/// check for CB flags
1529///
1530/// In general we can create code for arbitrary "... (and (and A B) C)"
1531/// sequences. We can also implement some "or" expressions, because "(or A B)"
1532/// is equivalent to "not (and (not A) (not B))" and we can implement some
1533/// negation operations:
1534/// We can negate the results of a single comparison by inverting the flags
1535/// used when the predicate fails and inverting the flags tested in the next
1536/// instruction; We can also negate the results of the whole previous
1537/// conditional compare sequence by inverting the flags tested in the next
1538/// instruction. However there is no way to negate the result of a partial
1539/// sequence.
1540///
1541/// Therefore on encountering an "or" expression we can negate the subtree on
1542/// one side and have to be able to push the negate to the leafs of the subtree
1543/// on the other side (see also the comments in code). As complete example:
1544/// "or (or (setCA (cmp A)) (setCB (cmp B)))
1545/// (and (setCC (cmp C)) (setCD (cmp D)))"
1546/// is transformed to
1547/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
1548/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1549/// and implemented as:
1550/// cmp C
1551/// ccmp D, inv(CD), CC
1552/// ccmp A, CA, inv(CD)
1553/// ccmp B, CB, inv(CA)
1554/// check for CB flags
1555/// A counterexample is "or (and A B) (and C D)" which cannot be implemented
1556/// by conditional compare sequences.
1557/// @{
1558
1559/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1560static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1561 ISD::CondCode CC, SDValue CCOp,
1562 AArch64CC::CondCode Predicate,
1563 AArch64CC::CondCode OutCC,
1564 const SDLoc &DL, SelectionDAG &DAG) {
1565 unsigned Opcode = 0;
1566 const bool FullFP16 =
1567 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1568
1569 if (LHS.getValueType().isFloatingPoint()) {
1570 assert(LHS.getValueType() != MVT::f128)((LHS.getValueType() != MVT::f128) ? static_cast<void> (
0) : __assert_fail ("LHS.getValueType() != MVT::f128", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1570, __PRETTY_FUNCTION__))
;
1571 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
1572 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1573 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1574 }
1575 Opcode = AArch64ISD::FCCMP;
1576 } else if (RHS.getOpcode() == ISD::SUB) {
1577 SDValue SubOp0 = RHS.getOperand(0);
1578 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1579 // See emitComparison() on why we can only do this for SETEQ and SETNE.
1580 Opcode = AArch64ISD::CCMN;
1581 RHS = RHS.getOperand(1);
1582 }
1583 }
1584 if (Opcode == 0)
1585 Opcode = AArch64ISD::CCMP;
1586
1587 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1588 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1589 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1590 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1591 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1592}
1593
1594/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
1595/// CanPushNegate is set to true if we can push a negate operation through
1596/// the tree in a was that we are left with AND operations and negate operations
1597/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
1598/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
1599/// brought into such a form.
1600static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate,
1601 unsigned Depth = 0) {
1602 if (!Val.hasOneUse())
1603 return false;
1604 unsigned Opcode = Val->getOpcode();
1605 if (Opcode == ISD::SETCC) {
1606 if (Val->getOperand(0).getValueType() == MVT::f128)
1607 return false;
1608 CanNegate = true;
1609 return true;
1610 }
1611 // Protect against exponential runtime and stack overflow.
1612 if (Depth > 6)
1613 return false;
1614 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1615 SDValue O0 = Val->getOperand(0);
1616 SDValue O1 = Val->getOperand(1);
1617 bool CanNegateL;
1618 if (!isConjunctionDisjunctionTree(O0, CanNegateL, Depth+1))
1619 return false;
1620 bool CanNegateR;
1621 if (!isConjunctionDisjunctionTree(O1, CanNegateR, Depth+1))
1622 return false;
1623
1624 if (Opcode == ISD::OR) {
1625 // For an OR expression we need to be able to negate at least one side or
1626 // we cannot do the transformation at all.
1627 if (!CanNegateL && !CanNegateR)
1628 return false;
1629 // We can however change a (not (or x y)) to (and (not x) (not y)) if we
1630 // can negate the x and y subtrees.
1631 CanNegate = CanNegateL && CanNegateR;
1632 } else {
1633 // If the operands are OR expressions then we finally need to negate their
1634 // outputs, we can only do that for the operand with emitted last by
1635 // negating OutCC, not for both operands.
1636 bool NeedsNegOutL = O0->getOpcode() == ISD::OR;
1637 bool NeedsNegOutR = O1->getOpcode() == ISD::OR;
1638 if (NeedsNegOutL && NeedsNegOutR)
1639 return false;
1640 // We cannot negate an AND operation (it would become an OR),
1641 CanNegate = false;
1642 }
1643 return true;
1644 }
1645 return false;
1646}
1647
1648/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1649/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1650/// Tries to transform the given i1 producing node @p Val to a series compare
1651/// and conditional compare operations. @returns an NZCV flags producing node
1652/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1653/// transformation was not possible.
1654/// On recursive invocations @p PushNegate may be set to true to have negation
1655/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
1656/// for the comparisons in the current subtree; @p Depth limits the search
1657/// depth to avoid stack overflow.
1658static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val,
1659 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1660 AArch64CC::CondCode Predicate) {
1661 // We're at a tree leaf, produce a conditional comparison operation.
1662 unsigned Opcode = Val->getOpcode();
1663 if (Opcode == ISD::SETCC) {
1664 SDValue LHS = Val->getOperand(0);
1665 SDValue RHS = Val->getOperand(1);
1666 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1667 bool isInteger = LHS.getValueType().isInteger();
1668 if (Negate)
1669 CC = getSetCCInverse(CC, isInteger);
1670 SDLoc DL(Val);
1671 // Determine OutCC and handle FP special case.
1672 if (isInteger) {
1673 OutCC = changeIntCCToAArch64CC(CC);
1674 } else {
1675 assert(LHS.getValueType().isFloatingPoint())((LHS.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1675, __PRETTY_FUNCTION__))
;
1676 AArch64CC::CondCode ExtraCC;
1677 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1678 // Some floating point conditions can't be tested with a single condition
1679 // code. Construct an additional comparison in this case.
1680 if (ExtraCC != AArch64CC::AL) {
1681 SDValue ExtraCmp;
1682 if (!CCOp.getNode())
1683 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1684 else
1685 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1686 ExtraCC, DL, DAG);
1687 CCOp = ExtraCmp;
1688 Predicate = ExtraCC;
1689 }
1690 }
1691
1692 // Produce a normal comparison if we are first in the chain
1693 if (!CCOp)
1694 return emitComparison(LHS, RHS, CC, DL, DAG);
1695 // Otherwise produce a ccmp.
1696 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
1697 DAG);
1698 }
1699 assert((Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) &&(((Opcode == ISD::AND || (Opcode == ISD::OR && Val->
hasOneUse())) && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1700, __PRETTY_FUNCTION__))
1700 "Valid conjunction/disjunction tree")(((Opcode == ISD::AND || (Opcode == ISD::OR && Val->
hasOneUse())) && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("(Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1700, __PRETTY_FUNCTION__))
;
1701
1702 // Check if both sides can be transformed.
1703 SDValue LHS = Val->getOperand(0);
1704 SDValue RHS = Val->getOperand(1);
1705
1706 // In case of an OR we need to negate our operands and the result.
1707 // (A v B) <=> not(not(A) ^ not(B))
1708 bool NegateOpsAndResult = Opcode == ISD::OR;
1709 // We can negate the results of all previous operations by inverting the
1710 // predicate flags giving us a free negation for one side. The other side
1711 // must be negatable by itself.
1712 if (NegateOpsAndResult) {
1713 // See which side we can negate.
1714 bool CanNegateL;
1715 bool isValidL = isConjunctionDisjunctionTree(LHS, CanNegateL);
1716 assert(isValidL && "Valid conjunction/disjunction tree")((isValidL && "Valid conjunction/disjunction tree") ?
static_cast<void> (0) : __assert_fail ("isValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1716, __PRETTY_FUNCTION__))
;
1717 (void)isValidL;
1718
1719#ifndef NDEBUG
1720 bool CanNegateR;
1721 bool isValidR = isConjunctionDisjunctionTree(RHS, CanNegateR);
1722 assert(isValidR && "Valid conjunction/disjunction tree")((isValidR && "Valid conjunction/disjunction tree") ?
static_cast<void> (0) : __assert_fail ("isValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1722, __PRETTY_FUNCTION__))
;
1723 assert((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree")(((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("(CanNegateL || CanNegateR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1723, __PRETTY_FUNCTION__))
;
1724#endif
1725
1726 // Order the side which we cannot negate to RHS so we can emit it first.
1727 if (!CanNegateL)
1728 std::swap(LHS, RHS);
1729 } else {
1730 bool NeedsNegOutL = LHS->getOpcode() == ISD::OR;
1731 assert((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) &&(((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) &&
"Valid conjunction/disjunction tree") ? static_cast<void>
(0) : __assert_fail ("(!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1732, __PRETTY_FUNCTION__))
1732 "Valid conjunction/disjunction tree")(((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) &&
"Valid conjunction/disjunction tree") ? static_cast<void>
(0) : __assert_fail ("(!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1732, __PRETTY_FUNCTION__))
;
1733 // Order the side where we need to negate the output flags to RHS so it
1734 // gets emitted first.
1735 if (NeedsNegOutL)
1736 std::swap(LHS, RHS);
1737 }
1738
1739 // Emit RHS. If we want to negate the tree we only need to push a negate
1740 // through if we are already in a PushNegate case, otherwise we can negate
1741 // the "flags to test" afterwards.
1742 AArch64CC::CondCode RHSCC;
1743 SDValue CmpR = emitConjunctionDisjunctionTreeRec(DAG, RHS, RHSCC, Negate,
1744 CCOp, Predicate);
1745 if (NegateOpsAndResult && !Negate)
1746 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1747 // Emit LHS. We may need to negate it.
1748 SDValue CmpL = emitConjunctionDisjunctionTreeRec(DAG, LHS, OutCC,
1749 NegateOpsAndResult, CmpR,
1750 RHSCC);
1751 // If we transformed an OR to and AND then we have to negate the result
1752 // (or absorb the Negate parameter).
1753 if (NegateOpsAndResult && !Negate)
1754 OutCC = AArch64CC::getInvertedCondCode(OutCC);
1755 return CmpL;
1756}
1757
1758/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1759/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1760/// \see emitConjunctionDisjunctionTreeRec().
1761static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
1762 AArch64CC::CondCode &OutCC) {
1763 bool CanNegate;
1764 if (!isConjunctionDisjunctionTree(Val, CanNegate))
1765 return SDValue();
1766
1767 return emitConjunctionDisjunctionTreeRec(DAG, Val, OutCC, false, SDValue(),
1768 AArch64CC::AL);
1769}
1770
1771/// @}
1772
1773/// Returns how profitable it is to fold a comparison's operand's shift and/or
1774/// extension operations.
1775static unsigned getCmpOperandFoldingProfit(SDValue Op) {
1776 auto isSupportedExtend = [&](SDValue V) {
1777 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
1778 return true;
1779
1780 if (V.getOpcode() == ISD::AND)
1781 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
1782 uint64_t Mask = MaskCst->getZExtValue();
1783 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
1784 }
1785
1786 return false;
1787 };
1788
1789 if (!Op.hasOneUse())
1790 return 0;
1791
1792 if (isSupportedExtend(Op))
1793 return 1;
1794
1795 unsigned Opc = Op.getOpcode();
1796 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
1797 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
1798 uint64_t Shift = ShiftCst->getZExtValue();
1799 if (isSupportedExtend(Op.getOperand(0)))
1800 return (Shift <= 4) ? 2 : 1;
1801 EVT VT = Op.getValueType();
1802 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
1803 return 1;
1804 }
1805
1806 return 0;
1807}
1808
1809static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1810 SDValue &AArch64cc, SelectionDAG &DAG,
1811 const SDLoc &dl) {
1812 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1813 EVT VT = RHS.getValueType();
1814 uint64_t C = RHSC->getZExtValue();
1815 if (!isLegalArithImmed(C)) {
1816 // Constant does not fit, try adjusting it by one?
1817 switch (CC) {
1818 default:
1819 break;
1820 case ISD::SETLT:
1821 case ISD::SETGE:
1822 if ((VT == MVT::i32 && C != 0x80000000 &&
1823 isLegalArithImmed((uint32_t)(C - 1))) ||
1824 (VT == MVT::i64 && C != 0x80000000ULL &&
1825 isLegalArithImmed(C - 1ULL))) {
1826 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1827 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1828 RHS = DAG.getConstant(C, dl, VT);
1829 }
1830 break;
1831 case ISD::SETULT:
1832 case ISD::SETUGE:
1833 if ((VT == MVT::i32 && C != 0 &&
1834 isLegalArithImmed((uint32_t)(C - 1))) ||
1835 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
1836 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1837 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1838 RHS = DAG.getConstant(C, dl, VT);
1839 }
1840 break;
1841 case ISD::SETLE:
1842 case ISD::SETGT:
1843 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
1844 isLegalArithImmed((uint32_t)(C + 1))) ||
1845 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
1846 isLegalArithImmed(C + 1ULL))) {
1847 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1848 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1849 RHS = DAG.getConstant(C, dl, VT);
1850 }
1851 break;
1852 case ISD::SETULE:
1853 case ISD::SETUGT:
1854 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
1855 isLegalArithImmed((uint32_t)(C + 1))) ||
1856 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
1857 isLegalArithImmed(C + 1ULL))) {
1858 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1859 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1860 RHS = DAG.getConstant(C, dl, VT);
1861 }
1862 break;
1863 }
1864 }
1865 }
1866
1867 // Comparisons are canonicalized so that the RHS operand is simpler than the
1868 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
1869 // can fold some shift+extend operations on the RHS operand, so swap the
1870 // operands if that can be done.
1871 //
1872 // For example:
1873 // lsl w13, w11, #1
1874 // cmp w13, w12
1875 // can be turned into:
1876 // cmp w12, w11, lsl #1
1877 if (!isa<ConstantSDNode>(RHS) ||
1878 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
1879 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
1880
1881 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
1882 std::swap(LHS, RHS);
1883 CC = ISD::getSetCCSwappedOperands(CC);
1884 }
1885 }
1886
1887 SDValue Cmp;
1888 AArch64CC::CondCode AArch64CC;
1889 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
1890 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
1891
1892 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
1893 // For the i8 operand, the largest immediate is 255, so this can be easily
1894 // encoded in the compare instruction. For the i16 operand, however, the
1895 // largest immediate cannot be encoded in the compare.
1896 // Therefore, use a sign extending load and cmn to avoid materializing the
1897 // -1 constant. For example,
1898 // movz w1, #65535
1899 // ldrh w0, [x0, #0]
1900 // cmp w0, w1
1901 // >
1902 // ldrsh w0, [x0, #0]
1903 // cmn w0, #1
1904 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
1905 // if and only if (sext LHS) == (sext RHS). The checks are in place to
1906 // ensure both the LHS and RHS are truly zero extended and to make sure the
1907 // transformation is profitable.
1908 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
1909 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
1910 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
1911 LHS.getNode()->hasNUsesOfValue(1, 0)) {
1912 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
1913 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
1914 SDValue SExt =
1915 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
1916 DAG.getValueType(MVT::i16));
1917 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
1918 RHS.getValueType()),
1919 CC, dl, DAG);
1920 AArch64CC = changeIntCCToAArch64CC(CC);
1921 }
1922 }
1923
1924 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
1925 if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
1926 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
1927 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
1928 }
1929 }
1930 }
1931
1932 if (!Cmp) {
1933 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
1934 AArch64CC = changeIntCCToAArch64CC(CC);
1935 }
1936 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
1937 return Cmp;
1938}
1939
1940static std::pair<SDValue, SDValue>
1941getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
1942 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1943, __PRETTY_FUNCTION__))
1943 "Unsupported value type")(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1943, __PRETTY_FUNCTION__))
;
1944 SDValue Value, Overflow;
1945 SDLoc DL(Op);
1946 SDValue LHS = Op.getOperand(0);
1947 SDValue RHS = Op.getOperand(1);
1948 unsigned Opc = 0;
1949 switch (Op.getOpcode()) {
1950 default:
1951 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1951)
;
1952 case ISD::SADDO:
1953 Opc = AArch64ISD::ADDS;
1954 CC = AArch64CC::VS;
1955 break;
1956 case ISD::UADDO:
1957 Opc = AArch64ISD::ADDS;
1958 CC = AArch64CC::HS;
1959 break;
1960 case ISD::SSUBO:
1961 Opc = AArch64ISD::SUBS;
1962 CC = AArch64CC::VS;
1963 break;
1964 case ISD::USUBO:
1965 Opc = AArch64ISD::SUBS;
1966 CC = AArch64CC::LO;
1967 break;
1968 // Multiply needs a little bit extra work.
1969 case ISD::SMULO:
1970 case ISD::UMULO: {
1971 CC = AArch64CC::NE;
1972 bool IsSigned = Op.getOpcode() == ISD::SMULO;
1973 if (Op.getValueType() == MVT::i32) {
1974 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1975 // For a 32 bit multiply with overflow check we want the instruction
1976 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
1977 // need to generate the following pattern:
1978 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
1979 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
1980 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
1981 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1982 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
1983 DAG.getConstant(0, DL, MVT::i64));
1984 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
1985 // operation. We need to clear out the upper 32 bits, because we used a
1986 // widening multiply that wrote all 64 bits. In the end this should be a
1987 // noop.
1988 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
1989 if (IsSigned) {
1990 // The signed overflow check requires more than just a simple check for
1991 // any bit set in the upper 32 bits of the result. These bits could be
1992 // just the sign bits of a negative number. To perform the overflow
1993 // check we have to arithmetic shift right the 32nd bit of the result by
1994 // 31 bits. Then we compare the result to the upper 32 bits.
1995 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
1996 DAG.getConstant(32, DL, MVT::i64));
1997 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
1998 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
1999 DAG.getConstant(31, DL, MVT::i64));
2000 // It is important that LowerBits is last, otherwise the arithmetic
2001 // shift will not be folded into the compare (SUBS).
2002 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
2003 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2004 .getValue(1);
2005 } else {
2006 // The overflow check for unsigned multiply is easy. We only need to
2007 // check if any of the upper 32 bits are set. This can be done with a
2008 // CMP (shifted register). For that we need to generate the following
2009 // pattern:
2010 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
2011 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2012 DAG.getConstant(32, DL, MVT::i64));
2013 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2014 Overflow =
2015 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2016 DAG.getConstant(0, DL, MVT::i64),
2017 UpperBits).getValue(1);
2018 }
2019 break;
2020 }
2021 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")((Op.getValueType() == MVT::i64 && "Expected an i64 value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2021, __PRETTY_FUNCTION__))
;
2022 // For the 64 bit multiply
2023 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2024 if (IsSigned) {
2025 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
2026 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
2027 DAG.getConstant(63, DL, MVT::i64));
2028 // It is important that LowerBits is last, otherwise the arithmetic
2029 // shift will not be folded into the compare (SUBS).
2030 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2031 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2032 .getValue(1);
2033 } else {
2034 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
2035 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2036 Overflow =
2037 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2038 DAG.getConstant(0, DL, MVT::i64),
2039 UpperBits).getValue(1);
2040 }
2041 break;
2042 }
2043 } // switch (...)
2044
2045 if (Opc) {
2046 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
2047
2048 // Emit the AArch64 operation with overflow check.
2049 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
2050 Overflow = Value.getValue(1);
2051 }
2052 return std::make_pair(Value, Overflow);
2053}
2054
2055SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
2056 RTLIB::Libcall Call) const {
2057 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2058 return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
2059}
2060
2061// Returns true if the given Op is the overflow flag result of an overflow
2062// intrinsic operation.
2063static bool isOverflowIntrOpRes(SDValue Op) {
2064 unsigned Opc = Op.getOpcode();
2065 return (Op.getResNo() == 1 &&
2066 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2067 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2068}
2069
2070static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
2071 SDValue Sel = Op.getOperand(0);
2072 SDValue Other = Op.getOperand(1);
2073 SDLoc dl(Sel);
2074
2075 // If the operand is an overflow checking operation, invert the condition
2076 // code and kill the Not operation. I.e., transform:
2077 // (xor (overflow_op_bool, 1))
2078 // -->
2079 // (csel 1, 0, invert(cc), overflow_op_bool)
2080 // ... which later gets transformed to just a cset instruction with an
2081 // inverted condition code, rather than a cset + eor sequence.
2082 if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
2083 // Only lower legal XALUO ops.
2084 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2085 return SDValue();
2086
2087 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2088 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2089 AArch64CC::CondCode CC;
2090 SDValue Value, Overflow;
2091 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2092 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2093 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2094 CCVal, Overflow);
2095 }
2096 // If neither operand is a SELECT_CC, give up.
2097 if (Sel.getOpcode() != ISD::SELECT_CC)
2098 std::swap(Sel, Other);
2099 if (Sel.getOpcode() != ISD::SELECT_CC)
2100 return Op;
2101
2102 // The folding we want to perform is:
2103 // (xor x, (select_cc a, b, cc, 0, -1) )
2104 // -->
2105 // (csel x, (xor x, -1), cc ...)
2106 //
2107 // The latter will get matched to a CSINV instruction.
2108
2109 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2110 SDValue LHS = Sel.getOperand(0);
2111 SDValue RHS = Sel.getOperand(1);
2112 SDValue TVal = Sel.getOperand(2);
2113 SDValue FVal = Sel.getOperand(3);
2114
2115 // FIXME: This could be generalized to non-integer comparisons.
2116 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2117 return Op;
2118
2119 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2120 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2121
2122 // The values aren't constants, this isn't the pattern we're looking for.
2123 if (!CFVal || !CTVal)
2124 return Op;
2125
2126 // We can commute the SELECT_CC by inverting the condition. This
2127 // might be needed to make this fit into a CSINV pattern.
2128 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2129 std::swap(TVal, FVal);
2130 std::swap(CTVal, CFVal);
2131 CC = ISD::getSetCCInverse(CC, true);
2132 }
2133
2134 // If the constants line up, perform the transform!
2135 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2136 SDValue CCVal;
2137 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2138
2139 FVal = Other;
2140 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2141 DAG.getConstant(-1ULL, dl, Other.getValueType()));
2142
2143 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2144 CCVal, Cmp);
2145 }
2146
2147 return Op;
2148}
2149
2150static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2151 EVT VT = Op.getValueType();
2152
2153 // Let legalize expand this if it isn't a legal type yet.
2154 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2155 return SDValue();
2156
2157 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2158
2159 unsigned Opc;
2160 bool ExtraOp = false;
2161 switch (Op.getOpcode()) {
2162 default:
2163 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2163)
;
2164 case ISD::ADDC:
2165 Opc = AArch64ISD::ADDS;
2166 break;
2167 case ISD::SUBC:
2168 Opc = AArch64ISD::SUBS;
2169 break;
2170 case ISD::ADDE:
2171 Opc = AArch64ISD::ADCS;
2172 ExtraOp = true;
2173 break;
2174 case ISD::SUBE:
2175 Opc = AArch64ISD::SBCS;
2176 ExtraOp = true;
2177 break;
2178 }
2179
2180 if (!ExtraOp)
2181 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2182 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2183 Op.getOperand(2));
2184}
2185
2186static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
2187 // Let legalize expand this if it isn't a legal type yet.
2188 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
2189 return SDValue();
2190
2191 SDLoc dl(Op);
2192 AArch64CC::CondCode CC;
2193 // The actual operation that sets the overflow or carry flag.
2194 SDValue Value, Overflow;
2195 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2196
2197 // We use 0 and 1 as false and true values.
2198 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2199 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2200
2201 // We use an inverted condition, because the conditional select is inverted
2202 // too. This will allow it to be selected to a single instruction:
2203 // CSINC Wd, WZR, WZR, invert(cond).
2204 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2205 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2206 CCVal, Overflow);
2207
2208 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2209 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2210}
2211
2212// Prefetch operands are:
2213// 1: Address to prefetch
2214// 2: bool isWrite
2215// 3: int locality (0 = no locality ... 3 = extreme locality)
2216// 4: bool isDataCache
2217static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
2218 SDLoc DL(Op);
2219 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2220 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2221 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2222
2223 bool IsStream = !Locality;
2224 // When the locality number is set
2225 if (Locality) {
2226 // The front-end should have filtered out the out-of-range values
2227 assert(Locality <= 3 && "Prefetch locality out-of-range")((Locality <= 3 && "Prefetch locality out-of-range"
) ? static_cast<void> (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2227, __PRETTY_FUNCTION__))
;
2228 // The locality degree is the opposite of the cache speed.
2229 // Put the number the other way around.
2230 // The encoding starts at 0 for level 1
2231 Locality = 3 - Locality;
2232 }
2233
2234 // built the mask value encoding the expected behavior.
2235 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
2236 (!IsData << 3) | // IsDataCache bit
2237 (Locality << 1) | // Cache level bits
2238 (unsigned)IsStream; // Stream bit
2239 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2240 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2241}
2242
2243SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2244 SelectionDAG &DAG) const {
2245 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")((Op.getValueType() == MVT::f128 && "Unexpected lowering"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2245, __PRETTY_FUNCTION__))
;
2246
2247 RTLIB::Libcall LC;
2248 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
2249
2250 return LowerF128Call(Op, DAG, LC);
2251}
2252
2253SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2254 SelectionDAG &DAG) const {
2255 if (Op.getOperand(0).getValueType() != MVT::f128) {
2256 // It's legal except when f128 is involved
2257 return Op;
2258 }
2259
2260 RTLIB::Libcall LC;
2261 LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
2262
2263 // FP_ROUND node has a second operand indicating whether it is known to be
2264 // precise. That doesn't take part in the LibCall so we can't directly use
2265 // LowerF128Call.
2266 SDValue SrcVal = Op.getOperand(0);
2267 return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
2268 SDLoc(Op)).first;
2269}
2270
2271static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2272 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2273 // Any additional optimization in this function should be recorded
2274 // in the cost tables.
2275 EVT InVT = Op.getOperand(0).getValueType();
2276 EVT VT = Op.getValueType();
2277 unsigned NumElts = InVT.getVectorNumElements();
2278
2279 // f16 vectors are promoted to f32 before a conversion.
2280 if (InVT.getVectorElementType() == MVT::f16) {
2281 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2282 SDLoc dl(Op);
2283 return DAG.getNode(
2284 Op.getOpcode(), dl, Op.getValueType(),
2285 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2286 }
2287
2288 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2289 SDLoc dl(Op);
2290 SDValue Cv =
2291 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2292 Op.getOperand(0));
2293 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2294 }
2295
2296 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2297 SDLoc dl(Op);
2298 MVT ExtVT =
2299 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
2300 VT.getVectorNumElements());
2301 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2302 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2303 }
2304
2305 // Type changing conversions are illegal.
2306 return Op;
2307}
2308
2309SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2310 SelectionDAG &DAG) const {
2311 if (Op.getOperand(0).getValueType().isVector())
2312 return LowerVectorFP_TO_INT(Op, DAG);
2313
2314 // f16 conversions are promoted to f32 when full fp16 is not supported.
2315 if (Op.getOperand(0).getValueType() == MVT::f16 &&
2316 !Subtarget->hasFullFP16()) {
2317 SDLoc dl(Op);
2318 return DAG.getNode(
2319 Op.getOpcode(), dl, Op.getValueType(),
2320 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
2321 }
2322
2323 if (Op.getOperand(0).getValueType() != MVT::f128) {
2324 // It's legal except when f128 is involved
2325 return Op;
2326 }
2327
2328 RTLIB::Libcall LC;
2329 if (Op.getOpcode() == ISD::FP_TO_SINT)
2330 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
2331 else
2332 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
2333
2334 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2335 return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
2336}
2337
2338static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2339 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2340 // Any additional optimization in this function should be recorded
2341 // in the cost tables.
2342 EVT VT = Op.getValueType();
2343 SDLoc dl(Op);
2344 SDValue In = Op.getOperand(0);
2345 EVT InVT = In.getValueType();
2346
2347 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2348 MVT CastVT =
2349 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
2350 InVT.getVectorNumElements());
2351 In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2352 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2353 }
2354
2355 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2356 unsigned CastOpc =
2357 Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2358 EVT CastVT = VT.changeVectorElementTypeToInteger();
2359 In = DAG.getNode(CastOpc, dl, CastVT, In);
2360 return DAG.getNode(Op.getOpcode(), dl, VT, In);
2361 }
2362
2363 return Op;
2364}
2365
2366SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2367 SelectionDAG &DAG) const {
2368 if (Op.getValueType().isVector())
2369 return LowerVectorINT_TO_FP(Op, DAG);
2370
2371 // f16 conversions are promoted to f32 when full fp16 is not supported.
2372 if (Op.getValueType() == MVT::f16 &&
2373 !Subtarget->hasFullFP16()) {
2374 SDLoc dl(Op);
2375 return DAG.getNode(
2376 ISD::FP_ROUND, dl, MVT::f16,
2377 DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
2378 DAG.getIntPtrConstant(0, dl));
2379 }
2380
2381 // i128 conversions are libcalls.
2382 if (Op.getOperand(0).getValueType() == MVT::i128)
2383 return SDValue();
2384
2385 // Other conversions are legal, unless it's to the completely software-based
2386 // fp128.
2387 if (Op.getValueType() != MVT::f128)
2388 return Op;
2389
2390 RTLIB::Libcall LC;
2391 if (Op.getOpcode() == ISD::SINT_TO_FP)
2392 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2393 else
2394 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2395
2396 return LowerF128Call(Op, DAG, LC);
2397}
2398
2399SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2400 SelectionDAG &DAG) const {
2401 // For iOS, we want to call an alternative entry point: __sincos_stret,
2402 // which returns the values in two S / D registers.
2403 SDLoc dl(Op);
2404 SDValue Arg = Op.getOperand(0);
2405 EVT ArgVT = Arg.getValueType();
2406 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2407
2408 ArgListTy Args;
2409 ArgListEntry Entry;
2410
2411 Entry.Node = Arg;
2412 Entry.Ty = ArgTy;
2413 Entry.IsSExt = false;
2414 Entry.IsZExt = false;
2415 Args.push_back(Entry);
2416
2417 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
2418 : RTLIB::SINCOS_STRET_F32;
2419 const char *LibcallName = getLibcallName(LC);
2420 SDValue Callee =
2421 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2422
2423 StructType *RetTy = StructType::get(ArgTy, ArgTy);
2424 TargetLowering::CallLoweringInfo CLI(DAG);
2425 CLI.setDebugLoc(dl)
2426 .setChain(DAG.getEntryNode())
2427 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2428
2429 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2430 return CallResult.first;
2431}
2432
2433static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2434 if (Op.getValueType() != MVT::f16)
2435 return SDValue();
2436
2437 assert(Op.getOperand(0).getValueType() == MVT::i16)((Op.getOperand(0).getValueType() == MVT::i16) ? static_cast<
void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2437, __PRETTY_FUNCTION__))
;
2438 SDLoc DL(Op);
2439
2440 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2441 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2442 return SDValue(
2443 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2444 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2445 0);
2446}
2447
2448static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2449 if (OrigVT.getSizeInBits() >= 64)
2450 return OrigVT;
2451
2452 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2452, __PRETTY_FUNCTION__))
;
2453
2454 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2455 switch (OrigSimpleTy) {
2456 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2456)
;
2457 case MVT::v2i8:
2458 case MVT::v2i16:
2459 return MVT::v2i32;
2460 case MVT::v4i8:
2461 return MVT::v4i16;
2462 }
2463}
2464
2465static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2466 const EVT &OrigTy,
2467 const EVT &ExtTy,
2468 unsigned ExtOpcode) {
2469 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2470 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2471 // 64-bits we need to insert a new extension so that it will be 64-bits.
2472 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2472, __PRETTY_FUNCTION__))
;
2473 if (OrigTy.getSizeInBits() >= 64)
2474 return N;
2475
2476 // Must extend size to at least 64 bits to be used as an operand for VMULL.
2477 EVT NewVT = getExtensionTo64Bits(OrigTy);
2478
2479 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2480}
2481
2482static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2483 bool isSigned) {
2484 EVT VT = N->getValueType(0);
2485
2486 if (N->getOpcode() != ISD::BUILD_VECTOR)
2487 return false;
2488
2489 for (const SDValue &Elt : N->op_values()) {
2490 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2491 unsigned EltSize = VT.getScalarSizeInBits();
2492 unsigned HalfSize = EltSize / 2;
2493 if (isSigned) {
2494 if (!isIntN(HalfSize, C->getSExtValue()))
2495 return false;
2496 } else {
2497 if (!isUIntN(HalfSize, C->getZExtValue()))
2498 return false;
2499 }
2500 continue;
2501 }
2502 return false;
2503 }
2504
2505 return true;
2506}
2507
2508static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2509 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2510 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2511 N->getOperand(0)->getValueType(0),
2512 N->getValueType(0),
2513 N->getOpcode());
2514
2515 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2515, __PRETTY_FUNCTION__))
;
2516 EVT VT = N->getValueType(0);
2517 SDLoc dl(N);
2518 unsigned EltSize = VT.getScalarSizeInBits() / 2;
2519 unsigned NumElts = VT.getVectorNumElements();
2520 MVT TruncVT = MVT::getIntegerVT(EltSize);
2521 SmallVector<SDValue, 8> Ops;
2522 for (unsigned i = 0; i != NumElts; ++i) {
2523 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2524 const APInt &CInt = C->getAPIntValue();
2525 // Element types smaller than 32 bits are not legal, so use i32 elements.
2526 // The values are implicitly truncated so sext vs. zext doesn't matter.
2527 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2528 }
2529 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2530}
2531
2532static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2533 return N->getOpcode() == ISD::SIGN_EXTEND ||
2534 isExtendedBUILD_VECTOR(N, DAG, true);
2535}
2536
2537static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2538 return N->getOpcode() == ISD::ZERO_EXTEND ||
2539 isExtendedBUILD_VECTOR(N, DAG, false);
2540}
2541
2542static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2543 unsigned Opcode = N->getOpcode();
2544 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2545 SDNode *N0 = N->getOperand(0).getNode();
2546 SDNode *N1 = N->getOperand(1).getNode();
2547 return N0->hasOneUse() && N1->hasOneUse() &&
2548 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2549 }
2550 return false;
2551}
2552
2553static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2554 unsigned Opcode = N->getOpcode();
2555 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2556 SDNode *N0 = N->getOperand(0).getNode();
2557 SDNode *N1 = N->getOperand(1).getNode();
2558 return N0->hasOneUse() && N1->hasOneUse() &&
2559 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2560 }
2561 return false;
2562}
2563
2564SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2565 SelectionDAG &DAG) const {
2566 // The rounding mode is in bits 23:22 of the FPSCR.
2567 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2568 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2569 // so that the shift + and get folded into a bitfield extract.
2570 SDLoc dl(Op);
2571
2572 SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
2573 DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl,
2574 MVT::i64));
2575 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
2576 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
2577 DAG.getConstant(1U << 22, dl, MVT::i32));
2578 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2579 DAG.getConstant(22, dl, MVT::i32));
2580 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2581 DAG.getConstant(3, dl, MVT::i32));
2582}
2583
2584static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2585 // Multiplications are only custom-lowered for 128-bit vectors so that
2586 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2587 EVT VT = Op.getValueType();
2588 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2589, __PRETTY_FUNCTION__))
2589 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2589, __PRETTY_FUNCTION__))
;
2590 SDNode *N0 = Op.getOperand(0).getNode();
2591 SDNode *N1 = Op.getOperand(1).getNode();
2592 unsigned NewOpc = 0;
2593 bool isMLA = false;
2594 bool isN0SExt = isSignExtended(N0, DAG);
2595 bool isN1SExt = isSignExtended(N1, DAG);
2596 if (isN0SExt && isN1SExt)
2597 NewOpc = AArch64ISD::SMULL;
2598 else {
2599 bool isN0ZExt = isZeroExtended(N0, DAG);
2600 bool isN1ZExt = isZeroExtended(N1, DAG);
2601 if (isN0ZExt && isN1ZExt)
2602 NewOpc = AArch64ISD::UMULL;
2603 else if (isN1SExt || isN1ZExt) {
2604 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2605 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2606 if (isN1SExt && isAddSubSExt(N0, DAG)) {
2607 NewOpc = AArch64ISD::SMULL;
2608 isMLA = true;
2609 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2610 NewOpc = AArch64ISD::UMULL;
2611 isMLA = true;
2612 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2613 std::swap(N0, N1);
2614 NewOpc = AArch64ISD::UMULL;
2615 isMLA = true;
2616 }
2617 }
2618
2619 if (!NewOpc) {
2620 if (VT == MVT::v2i64)
2621 // Fall through to expand this. It is not legal.
2622 return SDValue();
2623 else
2624 // Other vector multiplications are legal.
2625 return Op;
2626 }
2627 }
2628
2629 // Legalize to a S/UMULL instruction
2630 SDLoc DL(Op);
2631 SDValue Op0;
2632 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2633 if (!isMLA) {
2634 Op0 = skipExtensionForVectorMULL(N0, DAG);
2635 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2637, __PRETTY_FUNCTION__))
2636 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2637, __PRETTY_FUNCTION__))
2637 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2637, __PRETTY_FUNCTION__))
;
2638 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2639 }
2640 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2641 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2642 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2643 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2644 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2645 EVT Op1VT = Op1.getValueType();
2646 return DAG.getNode(N0->getOpcode(), DL, VT,
2647 DAG.getNode(NewOpc, DL, VT,
2648 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2649 DAG.getNode(NewOpc, DL, VT,
2650 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2651}
2652
2653// Lower vector multiply high (ISD::MULHS and ISD::MULHU).
2654static SDValue LowerMULH(SDValue Op, SelectionDAG &DAG) {
2655 // Multiplications are only custom-lowered for 128-bit vectors so that
2656 // {S,U}MULL{2} can be detected. Otherwise v2i64 multiplications are not
2657 // legal.
2658 EVT VT = Op.getValueType();
2659 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MULH{U,S}"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MULH{U,S}\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2660, __PRETTY_FUNCTION__))
2660 "unexpected type for custom-lowering ISD::MULH{U,S}")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MULH{U,S}"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MULH{U,S}\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2660, __PRETTY_FUNCTION__))
;
2661
2662 SDValue V0 = Op.getOperand(0);
2663 SDValue V1 = Op.getOperand(1);
2664
2665 SDLoc DL(Op);
2666
2667 EVT ExtractVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
2668
2669 // We turn (V0 mulhs/mulhu V1) to:
2670 //
2671 // (uzp2 (smull (extract_subvector (ExtractVT V128:V0, (i64 0)),
2672 // (extract_subvector (ExtractVT V128:V1, (i64 0))))),
2673 // (smull (extract_subvector (ExtractVT V128:V0, (i64 VMull2Idx)),
2674 // (extract_subvector (ExtractVT V128:V2, (i64 VMull2Idx))))))
2675 //
2676 // Where ExtractVT is a subvector with half number of elements, and
2677 // VMullIdx2 is the index of the middle element (the high part).
2678 //
2679 // The vector hight part extract and multiply will be matched against
2680 // {S,U}MULL{v16i8_v8i16,v8i16_v4i32,v4i32_v2i64} which in turn will
2681 // issue a {s}mull2 instruction.
2682 //
2683 // This basically multiply the lower subvector with '{s,u}mull', the high
2684 // subvector with '{s,u}mull2', and shuffle both results high part in
2685 // resulting vector.
2686 unsigned Mull2VectorIdx = VT.getVectorNumElements () / 2;
2687 SDValue VMullIdx = DAG.getConstant(0, DL, MVT::i64);
2688 SDValue VMull2Idx = DAG.getConstant(Mull2VectorIdx, DL, MVT::i64);
2689
2690 SDValue VMullV0 =
2691 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V0, VMullIdx);
2692 SDValue VMullV1 =
2693 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, VMullIdx);
2694
2695 SDValue VMull2V0 =
2696 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V0, VMull2Idx);
2697 SDValue VMull2V1 =
2698 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, VMull2Idx);
2699
2700 unsigned MullOpc = Op.getOpcode() == ISD::MULHS ? AArch64ISD::SMULL
2701 : AArch64ISD::UMULL;
2702
2703 EVT MullVT = ExtractVT.widenIntegerVectorElementType(*DAG.getContext());
2704 SDValue Mull = DAG.getNode(MullOpc, DL, MullVT, VMullV0, VMullV1);
2705 SDValue Mull2 = DAG.getNode(MullOpc, DL, MullVT, VMull2V0, VMull2V1);
2706
2707 Mull = DAG.getNode(ISD::BITCAST, DL, VT, Mull);
2708 Mull2 = DAG.getNode(ISD::BITCAST, DL, VT, Mull2);
2709
2710 return DAG.getNode(AArch64ISD::UZP2, DL, VT, Mull, Mull2);
2711}
2712
2713SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2714 SelectionDAG &DAG) const {
2715 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2716 SDLoc dl(Op);
2717 switch (IntNo) {
2718 default: return SDValue(); // Don't custom lower most intrinsics.
2719 case Intrinsic::thread_pointer: {
2720 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2721 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2722 }
2723 case Intrinsic::aarch64_neon_abs:
2724 return DAG.getNode(ISD::ABS, dl, Op.getValueType(),
2725 Op.getOperand(1));
2726 case Intrinsic::aarch64_neon_smax:
2727 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2728 Op.getOperand(1), Op.getOperand(2));
2729 case Intrinsic::aarch64_neon_umax:
2730 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2731 Op.getOperand(1), Op.getOperand(2));
2732 case Intrinsic::aarch64_neon_smin:
2733 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2734 Op.getOperand(1), Op.getOperand(2));
2735 case Intrinsic::aarch64_neon_umin:
2736 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2737 Op.getOperand(1), Op.getOperand(2));
2738 }
2739}
2740
2741// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
2742static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
2743 EVT VT, EVT MemVT,
2744 SelectionDAG &DAG) {
2745 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2745, __PRETTY_FUNCTION__))
;
2746 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)((MemVT == MVT::v4i8 && VT == MVT::v4i16) ? static_cast
<void> (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2746, __PRETTY_FUNCTION__))
;
2747
2748 SDValue Value = ST->getValue();
2749
2750 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
2751 // the word lane which represent the v4i8 subvector. It optimizes the store
2752 // to:
2753 //
2754 // xtn v0.8b, v0.8h
2755 // str s0, [x0]
2756
2757 SDValue Undef = DAG.getUNDEF(MVT::i16);
2758 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
2759 {Undef, Undef, Undef, Undef});
2760
2761 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
2762 Value, UndefVec);
2763 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
2764
2765 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
2766 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
2767 Trunc, DAG.getConstant(0, DL, MVT::i64));
2768
2769 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
2770 ST->getBasePtr(), ST->getMemOperand());
2771}
2772
2773// Custom lowering for any store, vector or scalar and/or default or with
2774// a truncate operations. Currently only custom lower truncate operation
2775// from vector v4i16 to v4i8.
2776SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
2777 SelectionDAG &DAG) const {
2778 SDLoc Dl(Op);
2779 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
2780 assert (StoreNode && "Can only custom lower store nodes")((StoreNode && "Can only custom lower store nodes") ?
static_cast<void> (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2780, __PRETTY_FUNCTION__))
;
2781
2782 SDValue Value = StoreNode->getValue();
2783
2784 EVT VT = Value.getValueType();
2785 EVT MemVT = StoreNode->getMemoryVT();
2786
2787 assert (VT.isVector() && "Can only custom lower vector store types")((VT.isVector() && "Can only custom lower vector store types"
) ? static_cast<void> (0) : __assert_fail ("VT.isVector() && \"Can only custom lower vector store types\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2787, __PRETTY_FUNCTION__))
;
2788
2789 unsigned AS = StoreNode->getAddressSpace();
2790 unsigned Align = StoreNode->getAlignment();
2791 if (Align < MemVT.getStoreSize() &&
2792 !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
2793 return scalarizeVectorStore(StoreNode, DAG);
2794 }
2795
2796 if (StoreNode->isTruncatingStore()) {
2797 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
2798 }
2799
2800 return SDValue();
2801}
2802
2803SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
2804 SelectionDAG &DAG) const {
2805 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
2806 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
2807
2808 switch (Op.getOpcode()) {
2809 default:
2810 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2810)
;
2811 return SDValue();
2812 case ISD::BITCAST:
2813 return LowerBITCAST(Op, DAG);
2814 case ISD::GlobalAddress:
2815 return LowerGlobalAddress(Op, DAG);
2816 case ISD::GlobalTLSAddress:
2817 return LowerGlobalTLSAddress(Op, DAG);
2818 case ISD::SETCC:
2819 return LowerSETCC(Op, DAG);
2820 case ISD::BR_CC:
2821 return LowerBR_CC(Op, DAG);
2822 case ISD::SELECT:
2823 return LowerSELECT(Op, DAG);
2824 case ISD::SELECT_CC:
2825 return LowerSELECT_CC(Op, DAG);
2826 case ISD::JumpTable:
2827 return LowerJumpTable(Op, DAG);
2828 case ISD::BR_JT:
2829 return LowerBR_JT(Op, DAG);
2830 case ISD::ConstantPool:
2831 return LowerConstantPool(Op, DAG);
2832 case ISD::BlockAddress:
2833 return LowerBlockAddress(Op, DAG);
2834 case ISD::VASTART:
2835 return LowerVASTART(Op, DAG);
2836 case ISD::VACOPY:
2837 return LowerVACOPY(Op, DAG);
2838 case ISD::VAARG:
2839 return LowerVAARG(Op, DAG);
2840 case ISD::ADDC:
2841 case ISD::ADDE:
2842 case ISD::SUBC:
2843 case ISD::SUBE:
2844 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
2845 case ISD::SADDO:
2846 case ISD::UADDO:
2847 case ISD::SSUBO:
2848 case ISD::USUBO:
2849 case ISD::SMULO:
2850 case ISD::UMULO:
2851 return LowerXALUO(Op, DAG);
2852 case ISD::FADD:
2853 return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
2854 case ISD::FSUB:
2855 return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
2856 case ISD::FMUL:
2857 return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
2858 case ISD::FDIV:
2859 return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
2860 case ISD::FP_ROUND:
2861 return LowerFP_ROUND(Op, DAG);
2862 case ISD::FP_EXTEND:
2863 return LowerFP_EXTEND(Op, DAG);
2864 case ISD::FRAMEADDR:
2865 return LowerFRAMEADDR(Op, DAG);
2866 case ISD::RETURNADDR:
2867 return LowerRETURNADDR(Op, DAG);
2868 case ISD::INSERT_VECTOR_ELT:
2869 return LowerINSERT_VECTOR_ELT(Op, DAG);
2870 case ISD::EXTRACT_VECTOR_ELT:
2871 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2872 case ISD::BUILD_VECTOR:
2873 return LowerBUILD_VECTOR(Op, DAG);
2874 case ISD::VECTOR_SHUFFLE:
2875 return LowerVECTOR_SHUFFLE(Op, DAG);
2876 case ISD::EXTRACT_SUBVECTOR:
2877 return LowerEXTRACT_SUBVECTOR(Op, DAG);
2878 case ISD::SRA:
2879 case ISD::SRL:
2880 case ISD::SHL:
2881 return LowerVectorSRA_SRL_SHL(Op, DAG);
2882 case ISD::SHL_PARTS:
2883 return LowerShiftLeftParts(Op, DAG);
2884 case ISD::SRL_PARTS:
2885 case ISD::SRA_PARTS:
2886 return LowerShiftRightParts(Op, DAG);
2887 case ISD::CTPOP:
2888 return LowerCTPOP(Op, DAG);
2889 case ISD::FCOPYSIGN:
2890 return LowerFCOPYSIGN(Op, DAG);
2891 case ISD::AND:
2892 return LowerVectorAND(Op, DAG);
2893 case ISD::OR:
2894 return LowerVectorOR(Op, DAG);
2895 case ISD::XOR:
2896 return LowerXOR(Op, DAG);
2897 case ISD::PREFETCH:
2898 return LowerPREFETCH(Op, DAG);
2899 case ISD::SINT_TO_FP:
2900 case ISD::UINT_TO_FP:
2901 return LowerINT_TO_FP(Op, DAG);
2902 case ISD::FP_TO_SINT:
2903 case ISD::FP_TO_UINT:
2904 return LowerFP_TO_INT(Op, DAG);
2905 case ISD::FSINCOS:
2906 return LowerFSINCOS(Op, DAG);
2907 case ISD::FLT_ROUNDS_:
2908 return LowerFLT_ROUNDS_(Op, DAG);
2909 case ISD::MUL:
2910 return LowerMUL(Op, DAG);
2911 case ISD::MULHS:
2912 case ISD::MULHU:
2913 return LowerMULH(Op, DAG);
2914 case ISD::INTRINSIC_WO_CHAIN:
2915 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2916 case ISD::STORE:
2917 return LowerSTORE(Op, DAG);
2918 case ISD::VECREDUCE_ADD:
2919 case ISD::VECREDUCE_SMAX:
2920 case ISD::VECREDUCE_SMIN:
2921 case ISD::VECREDUCE_UMAX:
2922 case ISD::VECREDUCE_UMIN:
2923 case ISD::VECREDUCE_FMAX:
2924 case ISD::VECREDUCE_FMIN:
2925 return LowerVECREDUCE(Op, DAG);
2926 case ISD::ATOMIC_LOAD_SUB:
2927 return LowerATOMIC_LOAD_SUB(Op, DAG);
2928 case ISD::ATOMIC_LOAD_AND:
2929 return LowerATOMIC_LOAD_AND(Op, DAG);
2930 case ISD::DYNAMIC_STACKALLOC:
2931 return LowerDYNAMIC_STACKALLOC(Op, DAG);
2932 }
2933}
2934
2935//===----------------------------------------------------------------------===//
2936// Calling Convention Implementation
2937//===----------------------------------------------------------------------===//
2938
2939#include "AArch64GenCallingConv.inc"
2940
2941/// Selects the correct CCAssignFn for a given CallingConvention value.
2942CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2943 bool IsVarArg) const {
2944 switch (CC) {
2945 default:
2946 report_fatal_error("Unsupported calling convention.");
2947 case CallingConv::WebKit_JS:
2948 return CC_AArch64_WebKit_JS;
2949 case CallingConv::GHC:
2950 return CC_AArch64_GHC;
2951 case CallingConv::C:
2952 case CallingConv::Fast:
2953 case CallingConv::PreserveMost:
2954 case CallingConv::CXX_FAST_TLS:
2955 case CallingConv::Swift:
2956 if (Subtarget->isTargetWindows() && IsVarArg)
2957 return CC_AArch64_Win64_VarArg;
2958 if (!Subtarget->isTargetDarwin())
2959 return CC_AArch64_AAPCS;
2960 return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
2961 case CallingConv::Win64:
2962 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
2963 case CallingConv::AArch64_VectorCall:
2964 return CC_AArch64_AAPCS;
2965 }
2966}
2967
2968CCAssignFn *
2969AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
2970 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2971 : RetCC_AArch64_AAPCS;
2972}
2973
2974SDValue AArch64TargetLowering::LowerFormalArguments(
2975 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2976 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2977 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2978 MachineFunction &MF = DAG.getMachineFunction();
2979 MachineFrameInfo &MFI = MF.getFrameInfo();
2980 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
2981
2982 // Assign locations to all of the incoming arguments.
2983 SmallVector<CCValAssign, 16> ArgLocs;
2984 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2985 *DAG.getContext());
2986
2987 // At this point, Ins[].VT may already be promoted to i32. To correctly
2988 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
2989 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
2990 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
2991 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
2992 // LocVT.
2993 unsigned NumArgs = Ins.size();
2994 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
2995 unsigned CurArgIdx = 0;
2996 for (unsigned i = 0; i != NumArgs; ++i) {
2997 MVT ValVT = Ins[i].VT;
2998 if (Ins[i].isOrigArg()) {
2999 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
3000 CurArgIdx = Ins[i].getOrigArgIndex();
3001
3002 // Get type of the original argument.
3003 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
3004 /*AllowUnknown*/ true);
3005 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
3006 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3007 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3008 ValVT = MVT::i8;
3009 else if (ActualMVT == MVT::i16)
3010 ValVT = MVT::i16;
3011 }
3012 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3013 bool Res =
3014 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
3015 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3015, __PRETTY_FUNCTION__))
;
3016 (void)Res;
3017 }
3018 assert(ArgLocs.size() == Ins.size())((ArgLocs.size() == Ins.size()) ? static_cast<void> (0)
: __assert_fail ("ArgLocs.size() == Ins.size()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3018, __PRETTY_FUNCTION__))
;
3019 SmallVector<SDValue, 16> ArgValues;
3020 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3021 CCValAssign &VA = ArgLocs[i];
3022
3023 if (Ins[i].Flags.isByVal()) {
3024 // Byval is used for HFAs in the PCS, but the system should work in a
3025 // non-compliant manner for larger structs.
3026 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3027 int Size = Ins[i].Flags.getByValSize();
3028 unsigned NumRegs = (Size + 7) / 8;
3029
3030 // FIXME: This works on big-endian for composite byvals, which are the common
3031 // case. It should also work for fundamental types too.
3032 unsigned FrameIdx =
3033 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
3034 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
3035 InVals.push_back(FrameIdxN);
3036
3037 continue;
3038 }
3039
3040 if (VA.isRegLoc()) {
3041 // Arguments stored in registers.
3042 EVT RegVT = VA.getLocVT();
3043
3044 SDValue ArgValue;
3045 const TargetRegisterClass *RC;
3046
3047 if (RegVT == MVT::i32)
3048 RC = &AArch64::GPR32RegClass;
3049 else if (RegVT == MVT::i64)
3050 RC = &AArch64::GPR64RegClass;
3051 else if (RegVT == MVT::f16)
3052 RC = &AArch64::FPR16RegClass;
3053 else if (RegVT == MVT::f32)
3054 RC = &AArch64::FPR32RegClass;
3055 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
3056 RC = &AArch64::FPR64RegClass;
3057 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
3058 RC = &AArch64::FPR128RegClass;
3059 else
3060 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3060)
;
3061
3062 // Transform the arguments in physical registers into virtual ones.
3063 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3064 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
3065
3066 // If this is an 8, 16 or 32-bit value, it is really passed promoted
3067 // to 64 bits. Insert an assert[sz]ext to capture this, then
3068 // truncate to the right size.
3069 switch (VA.getLocInfo()) {
3070 default:
3071 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3071)
;
3072 case CCValAssign::Full:
3073 break;
3074 case CCValAssign::BCvt:
3075 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
3076 break;
3077 case CCValAssign::AExt:
3078 case CCValAssign::SExt:
3079 case CCValAssign::ZExt:
3080 // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
3081 // nodes after our lowering.
3082 assert(RegVT == Ins[i].VT && "incorrect register location selected")((RegVT == Ins[i].VT && "incorrect register location selected"
) ? static_cast<void> (0) : __assert_fail ("RegVT == Ins[i].VT && \"incorrect register location selected\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3082, __PRETTY_FUNCTION__))
;
3083 break;
3084 }
3085
3086 InVals.push_back(ArgValue);
3087
3088 } else { // VA.isRegLoc()
3089 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")((VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3089, __PRETTY_FUNCTION__))
;
3090 unsigned ArgOffset = VA.getLocMemOffset();
3091 unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
3092
3093 uint32_t BEAlign = 0;
3094 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
3095 !Ins[i].Flags.isInConsecutiveRegs())
3096 BEAlign = 8 - ArgSize;
3097
3098 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
3099
3100 // Create load nodes to retrieve arguments from the stack.
3101 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3102 SDValue ArgValue;
3103
3104 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
3105 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
3106 MVT MemVT = VA.getValVT();
3107
3108 switch (VA.getLocInfo()) {
3109 default:
3110 break;
3111 case CCValAssign::BCvt:
3112 MemVT = VA.getLocVT();
3113 break;
3114 case CCValAssign::SExt:
3115 ExtType = ISD::SEXTLOAD;
3116 break;
3117 case CCValAssign::ZExt:
3118 ExtType = ISD::ZEXTLOAD;
3119 break;
3120 case CCValAssign::AExt:
3121 ExtType = ISD::EXTLOAD;
3122 break;
3123 }
3124
3125 ArgValue = DAG.getExtLoad(
3126 ExtType, DL, VA.getLocVT(), Chain, FIN,
3127 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3128 MemVT);
3129
3130 InVals.push_back(ArgValue);
3131 }
3132 }
3133
3134 // varargs
3135 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3136 if (isVarArg) {
3137 if (!Subtarget->isTargetDarwin() || IsWin64) {
3138 // The AAPCS variadic function ABI is identical to the non-variadic
3139 // one. As a result there may be more arguments in registers and we should
3140 // save them for future reference.
3141 // Win64 variadic functions also pass arguments in registers, but all float
3142 // arguments are passed in integer registers.
3143 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
3144 }
3145
3146 // This will point to the next argument passed via stack.
3147 unsigned StackOffset = CCInfo.getNextStackOffset();
3148 // We currently pass all varargs at 8-byte alignment.
3149 StackOffset = ((StackOffset + 7) & ~7);
3150 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
3151 }
3152
3153 unsigned StackArgSize = CCInfo.getNextStackOffset();
3154 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3155 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
3156 // This is a non-standard ABI so by fiat I say we're allowed to make full
3157 // use of the stack area to be popped, which must be aligned to 16 bytes in
3158 // any case:
3159 StackArgSize = alignTo(StackArgSize, 16);
3160
3161 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
3162 // a multiple of 16.
3163 FuncInfo->setArgumentStackToRestore(StackArgSize);
3164
3165 // This realignment carries over to the available bytes below. Our own
3166 // callers will guarantee the space is free by giving an aligned value to
3167 // CALLSEQ_START.
3168 }
3169 // Even if we're not expected to free up the space, it's useful to know how
3170 // much is there while considering tail calls (because we can reuse it).
3171 FuncInfo->setBytesInStackArgArea(StackArgSize);
3172
3173 if (Subtarget->hasCustomCallingConv())
3174 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
3175
3176 return Chain;
3177}
3178
3179void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
3180 SelectionDAG &DAG,
3181 const SDLoc &DL,
3182 SDValue &Chain) const {
3183 MachineFunction &MF = DAG.getMachineFunction();
3184 MachineFrameInfo &MFI = MF.getFrameInfo();
3185 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3186 auto PtrVT = getPointerTy(DAG.getDataLayout());
3187 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3188
3189 SmallVector<SDValue, 8> MemOps;
3190
3191 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
3192 AArch64::X3, AArch64::X4, AArch64::X5,
3193 AArch64::X6, AArch64::X7 };
3194 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
3195 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
3196
3197 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
3198 int GPRIdx = 0;
3199 if (GPRSaveSize != 0) {
3200 if (IsWin64) {
3201 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
3202 if (GPRSaveSize & 15)
3203 // The extra size here, if triggered, will always be 8.
3204 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
3205 } else
3206 GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
3207
3208 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
3209
3210 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
3211 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
3212 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
3213 SDValue Store = DAG.getStore(
3214 Val.getValue(1), DL, Val, FIN,
3215 IsWin64
3216 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
3217 GPRIdx,
3218 (i - FirstVariadicGPR) * 8)
3219 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
3220 MemOps.push_back(Store);
3221 FIN =
3222 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
3223 }
3224 }
3225 FuncInfo->setVarArgsGPRIndex(GPRIdx);
3226 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
3227
3228 if (Subtarget->hasFPARMv8() && !IsWin64) {
3229 static const MCPhysReg FPRArgRegs[] = {
3230 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
3231 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
3232 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
3233 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
3234
3235 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
3236 int FPRIdx = 0;
3237 if (FPRSaveSize != 0) {
3238 FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
3239
3240 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
3241
3242 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
3243 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
3244 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
3245
3246 SDValue Store = DAG.getStore(
3247 Val.getValue(1), DL, Val, FIN,
3248 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
3249 MemOps.push_back(Store);
3250 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3251 DAG.getConstant(16, DL, PtrVT));
3252 }
3253 }
3254 FuncInfo->setVarArgsFPRIndex(FPRIdx);
3255 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3256 }
3257
3258 if (!MemOps.empty()) {
3259 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3260 }
3261}
3262
3263/// LowerCallResult - Lower the result values of a call into the
3264/// appropriate copies out of appropriate physical registers.
3265SDValue AArch64TargetLowering::LowerCallResult(
3266 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3267 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3268 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3269 SDValue ThisVal) const {
3270 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3271 ? RetCC_AArch64_WebKit_JS
3272 : RetCC_AArch64_AAPCS;
3273 // Assign locations to each value returned by this call.
3274 SmallVector<CCValAssign, 16> RVLocs;
3275 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3276 *DAG.getContext());
3277 CCInfo.AnalyzeCallResult(Ins, RetCC);
3278
3279 // Copy all of the result registers out of their specified physreg.
3280 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3281 CCValAssign VA = RVLocs[i];
3282
3283 // Pass 'this' value directly from the argument to return value, to avoid
3284 // reg unit interference
3285 if (i == 0 && isThisReturn) {
3286 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3287, __PRETTY_FUNCTION__))
3287 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3287, __PRETTY_FUNCTION__))
;
3288 InVals.push_back(ThisVal);
3289 continue;
3290 }
3291
3292 SDValue Val =
3293 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3294 Chain = Val.getValue(1);
3295 InFlag = Val.getValue(2);
3296
3297 switch (VA.getLocInfo()) {
3298 default:
3299 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3299)
;
3300 case CCValAssign::Full:
3301 break;
3302 case CCValAssign::BCvt:
3303 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3304 break;
3305 }
3306
3307 InVals.push_back(Val);
3308 }
3309
3310 return Chain;
3311}
3312
3313/// Return true if the calling convention is one that we can guarantee TCO for.
3314static bool canGuaranteeTCO(CallingConv::ID CC) {
3315 return CC == CallingConv::Fast;
3316}
3317
3318/// Return true if we might ever do TCO for calls with this calling convention.
3319static bool mayTailCallThisCC(CallingConv::ID CC) {
3320 switch (CC) {
3321 case CallingConv::C:
3322 case CallingConv::PreserveMost:
3323 case CallingConv::Swift:
3324 return true;
3325 default:
3326 return canGuaranteeTCO(CC);
3327 }
3328}
3329
3330bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3331 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3332 const SmallVectorImpl<ISD::OutputArg> &Outs,
3333 const SmallVectorImpl<SDValue> &OutVals,
3334 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3335 if (!mayTailCallThisCC(CalleeCC))
3336 return false;
3337
3338 MachineFunction &MF = DAG.getMachineFunction();
3339 const Function &CallerF = MF.getFunction();
3340 CallingConv::ID CallerCC = CallerF.getCallingConv();
3341 bool CCMatch = CallerCC == CalleeCC;
3342
3343 // Byval parameters hand the function a pointer directly into the stack area
3344 // we want to reuse during a tail call. Working around this *is* possible (see
3345 // X86) but less efficient and uglier in LowerCall.
3346 for (Function::const_arg_iterator i = CallerF.arg_begin(),
3347 e = CallerF.arg_end();
3348 i != e; ++i)
3349 if (i->hasByValAttr())
3350 return false;
3351
3352 if (getTargetMachine().Options.GuaranteedTailCallOpt)
3353 return canGuaranteeTCO(CalleeCC) && CCMatch;
3354
3355 // Externally-defined functions with weak linkage should not be
3356 // tail-called on AArch64 when the OS does not support dynamic
3357 // pre-emption of symbols, as the AAELF spec requires normal calls
3358 // to undefined weak functions to be replaced with a NOP or jump to the
3359 // next instruction. The behaviour of branch instructions in this
3360 // situation (as used for tail calls) is implementation-defined, so we
3361 // cannot rely on the linker replacing the tail call with a return.
3362 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3363 const GlobalValue *GV = G->getGlobal();
3364 const Triple &TT = getTargetMachine().getTargetTriple();
3365 if (GV->hasExternalWeakLinkage() &&
3366 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3367 return false;
3368 }
3369
3370 // Now we search for cases where we can use a tail call without changing the
3371 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3372 // concept.
3373
3374 // I want anyone implementing a new calling convention to think long and hard
3375 // about this assert.
3376 assert((!isVarArg || CalleeCC == CallingConv::C) &&(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3377, __PRETTY_FUNCTION__))
3377 "Unexpected variadic calling convention")(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3377, __PRETTY_FUNCTION__))
;
3378
3379 LLVMContext &C = *DAG.getContext();
3380 if (isVarArg && !Outs.empty()) {
3381 // At least two cases here: if caller is fastcc then we can't have any
3382 // memory arguments (we'd be expected to clean up the stack afterwards). If
3383 // caller is C then we could potentially use its argument area.
3384
3385 // FIXME: for now we take the most conservative of these in both cases:
3386 // disallow all variadic memory operands.
3387 SmallVector<CCValAssign, 16> ArgLocs;
3388 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3389
3390 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3391 for (const CCValAssign &ArgLoc : ArgLocs)
3392 if (!ArgLoc.isRegLoc())
3393 return false;
3394 }
3395
3396 // Check that the call results are passed in the same way.
3397 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3398 CCAssignFnForCall(CalleeCC, isVarArg),
3399 CCAssignFnForCall(CallerCC, isVarArg)))
3400 return false;
3401 // The callee has to preserve all registers the caller needs to preserve.
3402 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3403 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3404 if (!CCMatch) {
3405 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3406 if (Subtarget->hasCustomCallingConv()) {
3407 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
3408 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
3409 }
3410 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3411 return false;
3412 }
3413
3414 // Nothing more to check if the callee is taking no arguments
3415 if (Outs.empty())
3416 return true;
3417
3418 SmallVector<CCValAssign, 16> ArgLocs;
3419 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3420
3421 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3422
3423 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3424
3425 // If the stack arguments for this call do not fit into our own save area then
3426 // the call cannot be made tail.
3427 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3428 return false;
3429
3430 const MachineRegisterInfo &MRI = MF.getRegInfo();
3431 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3432 return false;
3433
3434 return true;
3435}
3436
3437SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3438 SelectionDAG &DAG,
3439 MachineFrameInfo &MFI,
3440 int ClobberedFI) const {
3441 SmallVector<SDValue, 8> ArgChains;
3442 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3443 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3444
3445 // Include the original chain at the beginning of the list. When this is
3446 // used by target LowerCall hooks, this helps legalize find the
3447 // CALLSEQ_BEGIN node.
3448 ArgChains.push_back(Chain);
3449
3450 // Add a chain value for each stack argument corresponding
3451 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
3452 UE = DAG.getEntryNode().getNode()->use_end();
3453 U != UE; ++U)
3454 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
3455 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
3456 if (FI->getIndex() < 0) {
3457 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
3458 int64_t InLastByte = InFirstByte;
3459 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
3460
3461 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
3462 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
3463 ArgChains.push_back(SDValue(L, 1));
3464 }
3465
3466 // Build a tokenfactor for all the chains.
3467 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
3468}
3469
3470bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
3471 bool TailCallOpt) const {
3472 return CallCC == CallingConv::Fast && TailCallOpt;
3473}
3474
3475/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
3476/// and add input and output parameter nodes.
3477SDValue
3478AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
3479 SmallVectorImpl<SDValue> &InVals) const {
3480 SelectionDAG &DAG = CLI.DAG;
3481 SDLoc &DL = CLI.DL;
3482 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
3483 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
3484 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
3485 SDValue Chain = CLI.Chain;
3486 SDValue Callee = CLI.Callee;
3487 bool &IsTailCall = CLI.IsTailCall;
3488 CallingConv::ID CallConv = CLI.CallConv;
3489 bool IsVarArg = CLI.IsVarArg;
3490
3491 MachineFunction &MF = DAG.getMachineFunction();
3492 bool IsThisReturn = false;
3493
3494 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3495 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3496 bool IsSibCall = false;
3497
3498 if (IsTailCall) {
3499 // Check if it's really possible to do a tail call.
3500 IsTailCall = isEligibleForTailCallOptimization(
3501 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3502 if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
3503 report_fatal_error("failed to perform tail call elimination on a call "
3504 "site marked musttail");
3505
3506 // A sibling call is one where we're under the usual C ABI and not planning
3507 // to change that but can still do a tail call:
3508 if (!TailCallOpt && IsTailCall)
3509 IsSibCall = true;
3510
3511 if (IsTailCall)
3512 ++NumTailCalls;
3513 }
3514
3515 // Analyze operands of the call, assigning locations to each operand.
3516 SmallVector<CCValAssign, 16> ArgLocs;
3517 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
3518 *DAG.getContext());
3519
3520 if (IsVarArg) {
3521 // Handle fixed and variable vector arguments differently.
3522 // Variable vector arguments always go into memory.
3523 unsigned NumArgs = Outs.size();
3524
3525 for (unsigned i = 0; i != NumArgs; ++i) {
3526 MVT ArgVT = Outs[i].VT;
3527 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3528 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
3529 /*IsVarArg=*/ !Outs[i].IsFixed);
3530 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
3531 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3531, __PRETTY_FUNCTION__))
;
3532 (void)Res;
3533 }
3534 } else {
3535 // At this point, Outs[].VT may already be promoted to i32. To correctly
3536 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3537 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3538 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
3539 // we use a special version of AnalyzeCallOperands to pass in ValVT and
3540 // LocVT.
3541 unsigned NumArgs = Outs.size();
3542 for (unsigned i = 0; i != NumArgs; ++i) {
3543 MVT ValVT = Outs[i].VT;
3544 // Get type of the original argument.
3545 EVT ActualVT = getValueType(DAG.getDataLayout(),
3546 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
3547 /*AllowUnknown*/ true);
3548 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
3549 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3550 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3551 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3552 ValVT = MVT::i8;
3553 else if (ActualMVT == MVT::i16)
3554 ValVT = MVT::i16;
3555
3556 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3557 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
3558 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3558, __PRETTY_FUNCTION__))
;
3559 (void)Res;
3560 }
3561 }
3562
3563 // Get a count of how many bytes are to be pushed on the stack.
3564 unsigned NumBytes = CCInfo.getNextStackOffset();
3565
3566 if (IsSibCall) {
3567 // Since we're not changing the ABI to make this a tail call, the memory
3568 // operands are already available in the caller's incoming argument space.
3569 NumBytes = 0;
3570 }
3571
3572 // FPDiff is the byte offset of the call's argument area from the callee's.
3573 // Stores to callee stack arguments will be placed in FixedStackSlots offset
3574 // by this amount for a tail call. In a sibling call it must be 0 because the
3575 // caller will deallocate the entire stack and the callee still expects its
3576 // arguments to begin at SP+0. Completely unused for non-tail calls.
3577 int FPDiff = 0;
3578
3579 if (IsTailCall && !IsSibCall) {
3580 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
3581
3582 // Since callee will pop argument stack as a tail call, we must keep the
3583 // popped size 16-byte aligned.
3584 NumBytes = alignTo(NumBytes, 16);
3585
3586 // FPDiff will be negative if this tail call requires more space than we
3587 // would automatically have in our incoming argument space. Positive if we
3588 // can actually shrink the stack.
3589 FPDiff = NumReusableBytes - NumBytes;
3590
3591 // The stack pointer must be 16-byte aligned at all times it's used for a
3592 // memory operation, which in practice means at *all* times and in
3593 // particular across call boundaries. Therefore our own arguments started at
3594 // a 16-byte aligned SP and the delta applied for the tail call should
3595 // satisfy the same constraint.
3596 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")((FPDiff % 16 == 0 && "unaligned stack on tail call")
? static_cast<void> (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3596, __PRETTY_FUNCTION__))
;
3597 }
3598
3599 // Adjust the stack pointer for the new arguments...
3600 // These operations are automatically eliminated by the prolog/epilog pass
3601 if (!IsSibCall)
3602 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
3603
3604 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
3605 getPointerTy(DAG.getDataLayout()));
3606
3607 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3608 SmallVector<SDValue, 8> MemOpChains;
3609 auto PtrVT = getPointerTy(DAG.getDataLayout());
3610
3611 // Walk the register/memloc assignments, inserting copies/loads.
3612 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
3613 ++i, ++realArgIdx) {
3614 CCValAssign &VA = ArgLocs[i];
3615 SDValue Arg = OutVals[realArgIdx];
3616 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3617
3618 // Promote the value if needed.
3619 switch (VA.getLocInfo()) {
3620 default:
3621 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3621)
;
3622 case CCValAssign::Full:
3623 break;
3624 case CCValAssign::SExt:
3625 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
3626 break;
3627 case CCValAssign::ZExt:
3628 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3629 break;
3630 case CCValAssign::AExt:
3631 if (Outs[realArgIdx].ArgVT == MVT::i1) {
3632 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
3633 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3634 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
3635 }
3636 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
3637 break;
3638 case CCValAssign::BCvt:
3639 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3640 break;
3641 case CCValAssign::FPExt:
3642 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
3643 break;
3644 }
3645
3646 if (VA.isRegLoc()) {
3647 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
3648 Outs[0].VT == MVT::i64) {
3649 assert(VA.getLocVT() == MVT::i64 &&((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3650, __PRETTY_FUNCTION__))
3650 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3650, __PRETTY_FUNCTION__))
;
3651 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3652, __PRETTY_FUNCTION__))
3652 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3652, __PRETTY_FUNCTION__))
;
3653 IsThisReturn = true;
3654 }
3655 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3656 } else {
3657 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3657, __PRETTY_FUNCTION__))
;
3658
3659 SDValue DstAddr;
3660 MachinePointerInfo DstInfo;
3661
3662 // FIXME: This works on big-endian for composite byvals, which are the
3663 // common case. It should also work for fundamental types too.
3664 uint32_t BEAlign = 0;
3665 unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
3666 : VA.getValVT().getSizeInBits();
3667 OpSize = (OpSize + 7) / 8;
3668 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
3669 !Flags.isInConsecutiveRegs()) {
3670 if (OpSize < 8)
3671 BEAlign = 8 - OpSize;
3672 }
3673 unsigned LocMemOffset = VA.getLocMemOffset();
3674 int32_t Offset = LocMemOffset + BEAlign;
3675 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3676 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3677
3678 if (IsTailCall) {
3679 Offset = Offset + FPDiff;
3680 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3681
3682 DstAddr = DAG.getFrameIndex(FI, PtrVT);
3683 DstInfo =
3684 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
3685
3686 // Make sure any stack arguments overlapping with where we're storing
3687 // are loaded before this eventual operation. Otherwise they'll be
3688 // clobbered.
3689 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
3690 } else {
3691 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3692
3693 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3694 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
3695 LocMemOffset);
3696 }
3697
3698 if (Outs[i].Flags.isByVal()) {
3699 SDValue SizeNode =
3700 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
3701 SDValue Cpy = DAG.getMemcpy(
3702 Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
3703 /*isVol = */ false, /*AlwaysInline = */ false,
3704 /*isTailCall = */ false,
3705 DstInfo, MachinePointerInfo());
3706
3707 MemOpChains.push_back(Cpy);
3708 } else {
3709 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
3710 // promoted to a legal register type i32, we should truncate Arg back to
3711 // i1/i8/i16.
3712 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
3713 VA.getValVT() == MVT::i16)
3714 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
3715
3716 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
3717 MemOpChains.push_back(Store);
3718 }
3719 }
3720 }
3721
3722 if (!MemOpChains.empty())
3723 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3724
3725 // Build a sequence of copy-to-reg nodes chained together with token chain
3726 // and flag operands which copy the outgoing args into the appropriate regs.
3727 SDValue InFlag;
3728 for (auto &RegToPass : RegsToPass) {
3729 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
3730 RegToPass.second, InFlag);
3731 InFlag = Chain.getValue(1);
3732 }
3733
3734 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
3735 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
3736 // node so that legalize doesn't hack it.
3737 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3738 auto GV = G->getGlobal();
3739 if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) ==
3740 AArch64II::MO_GOT) {
3741 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
3742 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3743 } else if (Subtarget->isTargetCOFF() && GV->hasDLLImportStorageClass()) {
3744 assert(Subtarget->isTargetWindows() &&((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3745, __PRETTY_FUNCTION__))
3745 "Windows is the only supported COFF target")((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3745, __PRETTY_FUNCTION__))
;
3746 Callee = getGOT(G, DAG, AArch64II::MO_DLLIMPORT);
3747 } else {
3748 const GlobalValue *GV = G->getGlobal();
3749 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3750 }
3751 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3752 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3753 Subtarget->isTargetMachO()) {
3754 const char *Sym = S->getSymbol();
3755 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
3756 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3757 } else {
3758 const char *Sym = S->getSymbol();
3759 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
3760 }
3761 }
3762
3763 // We don't usually want to end the call-sequence here because we would tidy
3764 // the frame up *after* the call, however in the ABI-changing tail-call case
3765 // we've carefully laid out the parameters so that when sp is reset they'll be
3766 // in the correct location.
3767 if (IsTailCall && !IsSibCall) {
3768 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3769 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
3770 InFlag = Chain.getValue(1);
3771 }
3772
3773 std::vector<SDValue> Ops;
3774 Ops.push_back(Chain);
3775 Ops.push_back(Callee);
3776
3777 if (IsTailCall) {
3778 // Each tail call may have to adjust the stack by a different amount, so
3779 // this information must travel along with the operation for eventual
3780 // consumption by emitEpilogue.
3781 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
3782 }
3783
3784 // Add argument registers to the end of the list so that they are known live
3785 // into the call.
3786 for (auto &RegToPass : RegsToPass)
3787 Ops.push_back(DAG.getRegister(RegToPass.first,
3788 RegToPass.second.getValueType()));
3789
3790 // Add a register mask operand representing the call-preserved registers.
3791 const uint32_t *Mask;
3792 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3793 if (IsThisReturn) {
3794 // For 'this' returns, use the X0-preserving mask if applicable
3795 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
3796 if (!Mask) {
3797 IsThisReturn = false;
3798 Mask = TRI->getCallPreservedMask(MF, CallConv);
3799 }
3800 } else
3801 Mask = TRI->getCallPreservedMask(MF, CallConv);
3802
3803 if (Subtarget->hasCustomCallingConv())
3804 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
3805
3806 if (TRI->isAnyArgRegReserved(MF))
3807 TRI->emitReservedArgRegCallError(MF);
3808
3809 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3809, __PRETTY_FUNCTION__))
;
3810 Ops.push_back(DAG.getRegisterMask(Mask));
3811
3812 if (InFlag.getNode())
3813 Ops.push_back(InFlag);
3814
3815 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3816
3817 // If we're doing a tall call, use a TC_RETURN here rather than an
3818 // actual call instruction.
3819 if (IsTailCall) {
3820 MF.getFrameInfo().setHasTailCall();
3821 return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
3822 }
3823
3824 // Returns a chain and a flag for retval copy to use.
3825 Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
3826 InFlag = Chain.getValue(1);
3827
3828 uint64_t CalleePopBytes =
3829 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
3830
3831 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3832 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
3833 InFlag, DL);
3834 if (!Ins.empty())
3835 InFlag = Chain.getValue(1);
3836
3837 // Handle result values, copying them out of physregs into vregs that we
3838 // return.
3839 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
3840 InVals, IsThisReturn,
3841 IsThisReturn ? OutVals[0] : SDValue());
3842}
3843
3844bool AArch64TargetLowering::CanLowerReturn(
3845 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3846 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3847 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3848 ? RetCC_AArch64_WebKit_JS
3849 : RetCC_AArch64_AAPCS;
3850 SmallVector<CCValAssign, 16> RVLocs;
3851 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3852 return CCInfo.CheckReturn(Outs, RetCC);
3853}
3854
3855SDValue
3856AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3857 bool isVarArg,
3858 const SmallVectorImpl<ISD::OutputArg> &Outs,
3859 const SmallVectorImpl<SDValue> &OutVals,
3860 const SDLoc &DL, SelectionDAG &DAG) const {
3861 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3862 ? RetCC_AArch64_WebKit_JS
3863 : RetCC_AArch64_AAPCS;
3864 SmallVector<CCValAssign, 16> RVLocs;
3865 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3866 *DAG.getContext());
3867 CCInfo.AnalyzeReturn(Outs, RetCC);
3868
3869 // Copy the result values into the output registers.
3870 SDValue Flag;
3871 SmallVector<SDValue, 4> RetOps(1, Chain);
3872 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
3873 ++i, ++realRVLocIdx) {
3874 CCValAssign &VA = RVLocs[i];
3875 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3875, __PRETTY_FUNCTION__))
;
3876 SDValue Arg = OutVals[realRVLocIdx];
3877
3878 switch (VA.getLocInfo()) {
3879 default:
3880 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3880)
;
3881 case CCValAssign::Full:
3882 if (Outs[i].ArgVT == MVT::i1) {
3883 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
3884 // value. This is strictly redundant on Darwin (which uses "zeroext
3885 // i1"), but will be optimised out before ISel.
3886 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3887 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3888 }
3889 break;
3890 case CCValAssign::BCvt:
3891 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3892 break;
3893 }
3894
3895 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
3896 Flag = Chain.getValue(1);
3897 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3898 }
3899 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3900 const MCPhysReg *I =
3901 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3902 if (I) {
3903 for (; *I; ++I) {
3904 if (AArch64::GPR64RegClass.contains(*I))
3905 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3906 else if (AArch64::FPR64RegClass.contains(*I))
3907 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3908 else
3909 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3909)
;
3910 }
3911 }
3912
3913 RetOps[0] = Chain; // Update chain.
3914
3915 // Add the flag if we have it.
3916 if (Flag.getNode())
3917 RetOps.push_back(Flag);
3918
3919 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
3920}
3921
3922//===----------------------------------------------------------------------===//
3923// Other Lowering Code
3924//===----------------------------------------------------------------------===//
3925
3926SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
3927 SelectionDAG &DAG,
3928 unsigned Flag) const {
3929 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
3930 N->getOffset(), Flag);
3931}
3932
3933SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
3934 SelectionDAG &DAG,
3935 unsigned Flag) const {
3936 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
3937}
3938
3939SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
3940 SelectionDAG &DAG,
3941 unsigned Flag) const {
3942 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
3943 N->getOffset(), Flag);
3944}
3945
3946SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
3947 SelectionDAG &DAG,
3948 unsigned Flag) const {
3949 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
3950}
3951
3952// (loadGOT sym)
3953template <class NodeTy>
3954SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
3955 unsigned Flags) const {
3956 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
3957 SDLoc DL(N);
3958 EVT Ty = getPointerTy(DAG.getDataLayout());
3959 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
3960 // FIXME: Once remat is capable of dealing with instructions with register
3961 // operands, expand this into two nodes instead of using a wrapper node.
3962 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
3963}
3964
3965// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
3966template <class NodeTy>
3967SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
3968 unsigned Flags) const {
3969 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
3970 SDLoc DL(N);
3971 EVT Ty = getPointerTy(DAG.getDataLayout());
3972 const unsigned char MO_NC = AArch64II::MO_NC;
3973 return DAG.getNode(
3974 AArch64ISD::WrapperLarge, DL, Ty,
3975 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
3976 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
3977 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
3978 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
3979}
3980
3981// (addlow (adrp %hi(sym)) %lo(sym))
3982template <class NodeTy>
3983SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3984 unsigned Flags) const {
3985 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
3986 SDLoc DL(N);
3987 EVT Ty = getPointerTy(DAG.getDataLayout());
3988 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
3989 SDValue Lo = getTargetNode(N, Ty, DAG,
3990 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
3991 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
3992 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
3993}
3994
3995// (adr sym)
3996template <class NodeTy>
3997SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
3998 unsigned Flags) const {
3999 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrTiny\n"
; } } while (false)
;
4000 SDLoc DL(N);
4001 EVT Ty = getPointerTy(DAG.getDataLayout());
4002 SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
4003 return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
4004}
4005
4006SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
4007 SelectionDAG &DAG) const {
4008 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
4009 const GlobalValue *GV = GN->getGlobal();
4010 unsigned char OpFlags =
4011 Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
4012
4013 if (OpFlags != AArch64II::MO_NO_FLAG)
4014 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4015, __PRETTY_FUNCTION__))
4015 "unexpected offset in global node")((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4015, __PRETTY_FUNCTION__))
;
4016
4017 // This also catches the large code model case for Darwin, and tiny code
4018 // model with got relocations.
4019 if ((OpFlags & AArch64II::MO_GOT) != 0) {
4020 return getGOT(GN, DAG, OpFlags);
4021 }
4022
4023 SDValue Result;
4024 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4025 Result = getAddrLarge(GN, DAG, OpFlags);
4026 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4027 Result = getAddrTiny(GN, DAG, OpFlags);
4028 } else {
4029 Result = getAddr(GN, DAG, OpFlags);
4030 }
4031 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4032 SDLoc DL(GN);
4033 if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
4034 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4035 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
4036 return Result;
4037}
4038
4039/// Convert a TLS address reference into the correct sequence of loads
4040/// and calls to compute the variable's address (for Darwin, currently) and
4041/// return an SDValue containing the final node.
4042
4043/// Darwin only has one TLS scheme which must be capable of dealing with the
4044/// fully general situation, in the worst case. This means:
4045/// + "extern __thread" declaration.
4046/// + Defined in a possibly unknown dynamic library.
4047///
4048/// The general system is that each __thread variable has a [3 x i64] descriptor
4049/// which contains information used by the runtime to calculate the address. The
4050/// only part of this the compiler needs to know about is the first xword, which
4051/// contains a function pointer that must be called with the address of the
4052/// entire descriptor in "x0".
4053///
4054/// Since this descriptor may be in a different unit, in general even the
4055/// descriptor must be accessed via an indirect load. The "ideal" code sequence
4056/// is:
4057/// adrp x0, _var@TLVPPAGE
4058/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
4059/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
4060/// ; the function pointer
4061/// blr x1 ; Uses descriptor address in x0
4062/// ; Address of _var is now in x0.
4063///
4064/// If the address of _var's descriptor *is* known to the linker, then it can
4065/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
4066/// a slight efficiency gain.
4067SDValue
4068AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
4069 SelectionDAG &DAG) const {
4070 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4071, __PRETTY_FUNCTION__))
4071 "This function expects a Darwin target")((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4071, __PRETTY_FUNCTION__))
;
4072
4073 SDLoc DL(Op);
4074 MVT PtrVT = getPointerTy(DAG.getDataLayout());
4075 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4076
4077 SDValue TLVPAddr =
4078 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4079 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
4080
4081 // The first entry in the descriptor is a function pointer that we must call
4082 // to obtain the address of the variable.
4083 SDValue Chain = DAG.getEntryNode();
4084 SDValue FuncTLVGet = DAG.getLoad(
4085 MVT::i64, DL, Chain, DescAddr,
4086 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
4087 /* Alignment = */ 8,
4088 MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant |
4089 MachineMemOperand::MODereferenceable);
4090 Chain = FuncTLVGet.getValue(1);
4091
4092 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4093 MFI.setAdjustsStack(true);
4094
4095 // TLS calls preserve all registers except those that absolutely must be
4096 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
4097 // silly).
4098 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4099 const uint32_t *Mask = TRI->getTLSCallPreservedMask();
4100 if (Subtarget->hasCustomCallingConv())
4101 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
4102
4103 // Finally, we can make the call. This is just a degenerate version of a
4104 // normal AArch64 call node: x0 takes the address of the descriptor, and
4105 // returns the address of the variable in this thread.
4106 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
4107 Chain =
4108 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
4109 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
4110 DAG.getRegisterMask(Mask), Chain.getValue(1));
4111 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
4112}
4113
4114/// When accessing thread-local variables under either the general-dynamic or
4115/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
4116/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
4117/// is a function pointer to carry out the resolution.
4118///
4119/// The sequence is:
4120/// adrp x0, :tlsdesc:var
4121/// ldr x1, [x0, #:tlsdesc_lo12:var]
4122/// add x0, x0, #:tlsdesc_lo12:var
4123/// .tlsdesccall var
4124/// blr x1
4125/// (TPIDR_EL0 offset now in x0)
4126///
4127/// The above sequence must be produced unscheduled, to enable the linker to
4128/// optimize/relax this sequence.
4129/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
4130/// above sequence, and expanded really late in the compilation flow, to ensure
4131/// the sequence is produced as per above.
4132SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
4133 const SDLoc &DL,
4134 SelectionDAG &DAG) const {
4135 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4136
4137 SDValue Chain = DAG.getEntryNode();
4138 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4139
4140 Chain =
4141 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
4142 SDValue Glue = Chain.getValue(1);
4143
4144 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
4145}
4146
4147SDValue
4148AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
4149 SelectionDAG &DAG) const {
4150 assert(Subtarget->isTargetELF() && "This function expects an ELF target")((Subtarget->isTargetELF() && "This function expects an ELF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4150, __PRETTY_FUNCTION__))
;
4151 if (getTargetMachine().getCodeModel() == CodeModel::Large)
4152 report_fatal_error("ELF TLS only supported in small memory model");
4153 // Different choices can be made for the maximum size of the TLS area for a
4154 // module. For the small address model, the default TLS size is 16MiB and the
4155 // maximum TLS size is 4GiB.
4156 // FIXME: add -mtls-size command line option and make it control the 16MiB
4157 // vs. 4GiB code sequence generation.
4158 // FIXME: add tiny codemodel support. We currently generate the same code as
4159 // small, which may be larger than needed.
4160 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4161
4162 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
4163
4164 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
4165 if (Model == TLSModel::LocalDynamic)
4166 Model = TLSModel::GeneralDynamic;
4167 }
4168
4169 SDValue TPOff;
4170 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4171 SDLoc DL(Op);
4172 const GlobalValue *GV = GA->getGlobal();
4173
4174 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
4175
4176 if (Model == TLSModel::LocalExec) {
4177 SDValue HiVar = DAG.getTargetGlobalAddress(
4178 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4179 SDValue LoVar = DAG.getTargetGlobalAddress(
4180 GV, DL, PtrVT, 0,
4181 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4182
4183 SDValue TPWithOff_lo =
4184 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
4185 HiVar,
4186 DAG.getTargetConstant(0, DL, MVT::i32)),
4187 0);
4188 SDValue TPWithOff =
4189 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
4190 LoVar,
4191 DAG.getTargetConstant(0, DL, MVT::i32)),
4192 0);
4193 return TPWithOff;
4194 } else if (Model == TLSModel::InitialExec) {
4195 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4196 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
4197 } else if (Model == TLSModel::LocalDynamic) {
4198 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
4199 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
4200 // the beginning of the module's TLS region, followed by a DTPREL offset
4201 // calculation.
4202
4203 // These accesses will need deduplicating if there's more than one.
4204 AArch64FunctionInfo *MFI =
4205 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4206 MFI->incNumLocalDynamicTLSAccesses();
4207
4208 // The call needs a relocation too for linker relaxation. It doesn't make
4209 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4210 // the address.
4211 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
4212 AArch64II::MO_TLS);
4213
4214 // Now we can calculate the offset from TPIDR_EL0 to this module's
4215 // thread-local area.
4216 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4217
4218 // Now use :dtprel_whatever: operations to calculate this variable's offset
4219 // in its thread-storage area.
4220 SDValue HiVar = DAG.getTargetGlobalAddress(
4221 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4222 SDValue LoVar = DAG.getTargetGlobalAddress(
4223 GV, DL, MVT::i64, 0,
4224 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4225
4226 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
4227 DAG.getTargetConstant(0, DL, MVT::i32)),
4228 0);
4229 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
4230 DAG.getTargetConstant(0, DL, MVT::i32)),
4231 0);
4232 } else if (Model == TLSModel::GeneralDynamic) {
4233 // The call needs a relocation too for linker relaxation. It doesn't make
4234 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4235 // the address.
4236 SDValue SymAddr =
4237 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4238
4239 // Finally we can make a call to calculate the offset from tpidr_el0.
4240 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4241 } else
4242 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4242)
;
4243
4244 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
4245}
4246
4247SDValue
4248AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
4249 SelectionDAG &DAG) const {
4250 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")((Subtarget->isTargetWindows() && "Windows specific TLS lowering"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4250, __PRETTY_FUNCTION__))
;
4251
4252 SDValue Chain = DAG.getEntryNode();
4253 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4254 SDLoc DL(Op);
4255
4256 SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
4257
4258 // Load the ThreadLocalStoragePointer from the TEB
4259 // A pointer to the TLS array is located at offset 0x58 from the TEB.
4260 SDValue TLSArray =
4261 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
4262 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
4263 Chain = TLSArray.getValue(1);
4264
4265 // Load the TLS index from the C runtime;
4266 // This does the same as getAddr(), but without having a GlobalAddressSDNode.
4267 // This also does the same as LOADgot, but using a generic i32 load,
4268 // while LOADgot only loads i64.
4269 SDValue TLSIndexHi =
4270 DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
4271 SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
4272 "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4273 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
4274 SDValue TLSIndex =
4275 DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
4276 TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
4277 Chain = TLSIndex.getValue(1);
4278
4279 // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
4280 // offset into the TLSArray.
4281 TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
4282 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
4283 DAG.getConstant(3, DL, PtrVT));
4284 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
4285 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
4286 MachinePointerInfo());
4287 Chain = TLS.getValue(1);
4288
4289 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4290 const GlobalValue *GV = GA->getGlobal();
4291 SDValue TGAHi = DAG.getTargetGlobalAddress(
4292 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4293 SDValue TGALo = DAG.getTargetGlobalAddress(
4294 GV, DL, PtrVT, 0,
4295 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4296
4297 // Add the offset from the start of the .tls section (section base).
4298 SDValue Addr =
4299 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
4300 DAG.getTargetConstant(0, DL, MVT::i32)),
4301 0);
4302 Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
4303 return Addr;
4304}
4305
4306SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
4307 SelectionDAG &DAG) const {
4308 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4309 if (DAG.getTarget().useEmulatedTLS())
4310 return LowerToTLSEmulatedModel(GA, DAG);
4311
4312 if (Subtarget->isTargetDarwin())
4313 return LowerDarwinGlobalTLSAddress(Op, DAG);
4314 if (Subtarget->isTargetELF())
4315 return LowerELFGlobalTLSAddress(Op, DAG);
4316 if (Subtarget->isTargetWindows())
4317 return LowerWindowsGlobalTLSAddress(Op, DAG);
4318
4319 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4319)
;
4320}
4321
4322SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4323 SDValue Chain = Op.getOperand(0);
4324 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4325 SDValue LHS = Op.getOperand(2);
4326 SDValue RHS = Op.getOperand(3);
4327 SDValue Dest = Op.getOperand(4);
4328 SDLoc dl(Op);
4329
4330 // Handle f128 first, since lowering it will result in comparing the return
4331 // value of a libcall against zero, which is just what the rest of LowerBR_CC
4332 // is expecting to deal with.
4333 if (LHS.getValueType() == MVT::f128) {
4334 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4335
4336 // If softenSetCCOperands returned a scalar, we need to compare the result
4337 // against zero to select between true and false values.
4338 if (!RHS.getNode()) {
4339 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4340 CC = ISD::SETNE;
4341 }
4342 }
4343
4344 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4345 // instruction.
4346 if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
4347 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4348 // Only lower legal XALUO ops.
4349 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4350 return SDValue();
4351
4352 // The actual operation with overflow check.
4353 AArch64CC::CondCode OFCC;
4354 SDValue Value, Overflow;
4355 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
4356
4357 if (CC == ISD::SETNE)
4358 OFCC = getInvertedCondCode(OFCC);
4359 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
4360
4361 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4362 Overflow);
4363 }
4364
4365 if (LHS.getValueType().isInteger()) {
4366 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4367, __PRETTY_FUNCTION__))
4367 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4367, __PRETTY_FUNCTION__))
;
4368
4369 // If the RHS of the comparison is zero, we can potentially fold this
4370 // to a specialized branch.
4371 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
4372 if (RHSC && RHSC->getZExtValue() == 0) {
4373 if (CC == ISD::SETEQ) {
4374 // See if we can use a TBZ to fold in an AND as well.
4375 // TBZ has a smaller branch displacement than CBZ. If the offset is
4376 // out of bounds, a late MI-layer pass rewrites branches.
4377 // 403.gcc is an example that hits this case.
4378 if (LHS.getOpcode() == ISD::AND &&
4379 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4380 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4381 SDValue Test = LHS.getOperand(0);
4382 uint64_t Mask = LHS.getConstantOperandVal(1);
4383 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
4384 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4385 Dest);
4386 }
4387
4388 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
4389 } else if (CC == ISD::SETNE) {
4390 // See if we can use a TBZ to fold in an AND as well.
4391 // TBZ has a smaller branch displacement than CBZ. If the offset is
4392 // out of bounds, a late MI-layer pass rewrites branches.
4393 // 403.gcc is an example that hits this case.
4394 if (LHS.getOpcode() == ISD::AND &&
4395 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4396 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4397 SDValue Test = LHS.getOperand(0);
4398 uint64_t Mask = LHS.getConstantOperandVal(1);
4399 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
4400 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4401 Dest);
4402 }
4403
4404 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
4405 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
4406 // Don't combine AND since emitComparison converts the AND to an ANDS
4407 // (a.k.a. TST) and the test in the test bit and branch instruction
4408 // becomes redundant. This would also increase register pressure.
4409 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4410 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
4411 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4412 }
4413 }
4414 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
4415 LHS.getOpcode() != ISD::AND) {
4416 // Don't combine AND since emitComparison converts the AND to an ANDS
4417 // (a.k.a. TST) and the test in the test bit and branch instruction
4418 // becomes redundant. This would also increase register pressure.
4419 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4420 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
4421 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4422 }
4423
4424 SDValue CCVal;
4425 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4426 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4427 Cmp);
4428 }
4429
4430 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4431, __PRETTY_FUNCTION__))
4431 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4431, __PRETTY_FUNCTION__))
;
4432
4433 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4434 // clean. Some of them require two branches to implement.
4435 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4436 AArch64CC::CondCode CC1, CC2;
4437 changeFPCCToAArch64CC(CC, CC1, CC2);
4438 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4439 SDValue BR1 =
4440 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
4441 if (CC2 != AArch64CC::AL) {
4442 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4443 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
4444 Cmp);
4445 }
4446
4447 return BR1;
4448}
4449
4450SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
4451 SelectionDAG &DAG) const {
4452 EVT VT = Op.getValueType();
4453 SDLoc DL(Op);
4454
4455 SDValue In1 = Op.getOperand(0);
4456 SDValue In2 = Op.getOperand(1);
4457 EVT SrcVT = In2.getValueType();
4458
4459 if (SrcVT.bitsLT(VT))
4460 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
4461 else if (SrcVT.bitsGT(VT))
4462 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
4463
4464 EVT VecVT;
4465 uint64_t EltMask;
4466 SDValue VecVal1, VecVal2;
4467
4468 auto setVecVal = [&] (int Idx) {
4469 if (!VT.isVector()) {
4470 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4471 DAG.getUNDEF(VecVT), In1);
4472 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4473 DAG.getUNDEF(VecVT), In2);
4474 } else {
4475 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
4476 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
4477 }
4478 };
4479
4480 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
4481 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
4482 EltMask = 0x80000000ULL;
4483 setVecVal(AArch64::ssub);
4484 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
4485 VecVT = MVT::v2i64;
4486
4487 // We want to materialize a mask with the high bit set, but the AdvSIMD
4488 // immediate moves cannot materialize that in a single instruction for
4489 // 64-bit elements. Instead, materialize zero and then negate it.
4490 EltMask = 0;
4491
4492 setVecVal(AArch64::dsub);
4493 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
4494 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
4495 EltMask = 0x8000ULL;
4496 setVecVal(AArch64::hsub);
4497 } else {
4498 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4498)
;
4499 }
4500
4501 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
4502
4503 // If we couldn't materialize the mask above, then the mask vector will be
4504 // the zero vector, and we need to negate it here.
4505 if (VT == MVT::f64 || VT == MVT::v2f64) {
4506 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
4507 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
4508 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
4509 }
4510
4511 SDValue Sel =
4512 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
4513
4514 if (VT == MVT::f16)
4515 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
4516 if (VT == MVT::f32)
4517 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
4518 else if (VT == MVT::f64)
4519 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
4520 else
4521 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
4522}
4523
4524SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
4525 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
4526 Attribute::NoImplicitFloat))
4527 return SDValue();
4528
4529 if (!Subtarget->hasNEON())
4530 return SDValue();
4531
4532 // While there is no integer popcount instruction, it can
4533 // be more efficiently lowered to the following sequence that uses
4534 // AdvSIMD registers/instructions as long as the copies to/from
4535 // the AdvSIMD registers are cheap.
4536 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
4537 // CNT V0.8B, V0.8B // 8xbyte pop-counts
4538 // ADDV B0, V0.8B // sum 8xbyte pop-counts
4539 // UMOV X0, V0.B[0] // copy byte result back to integer reg
4540 SDValue Val = Op.getOperand(0);
4541 SDLoc DL(Op);
4542 EVT VT = Op.getValueType();
4543
4544 if (VT == MVT::i32 || VT == MVT::i64) {
4545 if (VT == MVT::i32)
4546 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
4547 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
4548
4549 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
4550 SDValue UaddLV = DAG.getNode(
4551 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
4552 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
4553
4554 if (VT == MVT::i64)
4555 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
4556 return UaddLV;
4557 }
4558
4559 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4561, __PRETTY_FUNCTION__))
4560 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4561, __PRETTY_FUNCTION__))
4561 "Unexpected type for custom ctpop lowering")(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4561, __PRETTY_FUNCTION__))
;
4562
4563 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
4564 Val = DAG.getBitcast(VT8Bit, Val);
4565 Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
4566
4567 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
4568 unsigned EltSize = 8;
4569 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
4570 while (EltSize != VT.getScalarSizeInBits()) {
4571 EltSize *= 2;
4572 NumElts /= 2;
4573 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
4574 Val = DAG.getNode(
4575 ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
4576 DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
4577 }
4578
4579 return Val;
4580}
4581
4582SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
4583
4584 if (Op.getValueType().isVector())
4585 return LowerVSETCC(Op, DAG);
4586
4587 SDValue LHS = Op.getOperand(0);
4588 SDValue RHS = Op.getOperand(1);
4589 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
4590 SDLoc dl(Op);
4591
4592 // We chose ZeroOrOneBooleanContents, so use zero and one.
4593 EVT VT = Op.getValueType();
4594 SDValue TVal = DAG.getConstant(1, dl, VT);
4595 SDValue FVal = DAG.getConstant(0, dl, VT);
4596
4597 // Handle f128 first, since one possible outcome is a normal integer
4598 // comparison which gets picked up by the next if statement.
4599 if (LHS.getValueType() == MVT::f128) {
4600 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4601
4602 // If softenSetCCOperands returned a scalar, use it.
4603 if (!RHS.getNode()) {
4604 assert(LHS.getValueType() == Op.getValueType() &&((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4605, __PRETTY_FUNCTION__))
4605 "Unexpected setcc expansion!")((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4605, __PRETTY_FUNCTION__))
;
4606 return LHS;
4607 }
4608 }
4609
4610 if (LHS.getValueType().isInteger()) {
4611 SDValue CCVal;
4612 SDValue Cmp =
4613 getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
4614
4615 // Note that we inverted the condition above, so we reverse the order of
4616 // the true and false operands here. This will allow the setcc to be
4617 // matched to a single CSINC instruction.
4618 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
4619 }
4620
4621 // Now we know we're dealing with FP values.
4622 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4623, __PRETTY_FUNCTION__))
4623 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4623, __PRETTY_FUNCTION__))
;
4624
4625 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
4626 // and do the comparison.
4627 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4628
4629 AArch64CC::CondCode CC1, CC2;
4630 changeFPCCToAArch64CC(CC, CC1, CC2);
4631 if (CC2 == AArch64CC::AL) {
4632 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
4633 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4634
4635 // Note that we inverted the condition above, so we reverse the order of
4636 // the true and false operands here. This will allow the setcc to be
4637 // matched to a single CSINC instruction.
4638 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
4639 } else {
4640 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
4641 // totally clean. Some of them require two CSELs to implement. As is in
4642 // this case, we emit the first CSEL and then emit a second using the output
4643 // of the first as the RHS. We're effectively OR'ing the two CC's together.
4644
4645 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
4646 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4647 SDValue CS1 =
4648 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4649
4650 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4651 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4652 }
4653}
4654
4655SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
4656 SDValue RHS, SDValue TVal,
4657 SDValue FVal, const SDLoc &dl,
4658 SelectionDAG &DAG) const {
4659 // Handle f128 first, because it will result in a comparison of some RTLIB
4660 // call result against zero.
4661 if (LHS.getValueType() == MVT::f128) {
4662 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4663
4664 // If softenSetCCOperands returned a scalar, we need to compare the result
4665 // against zero to select between true and false values.
4666 if (!RHS.getNode()) {
4667 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4668 CC = ISD::SETNE;
4669 }
4670 }
4671
4672 // Also handle f16, for which we need to do a f32 comparison.
4673 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4674 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
4675 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
4676 }
4677
4678 // Next, handle integers.
4679 if (LHS.getValueType().isInteger()) {
4680 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4681, __PRETTY_FUNCTION__))
4681 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4681, __PRETTY_FUNCTION__))
;
4682
4683 unsigned Opcode = AArch64ISD::CSEL;
4684
4685 // If both the TVal and the FVal are constants, see if we can swap them in
4686 // order to for a CSINV or CSINC out of them.
4687 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
4688 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
4689
4690 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
4691 std::swap(TVal, FVal);
4692 std::swap(CTVal, CFVal);
4693 CC = ISD::getSetCCInverse(CC, true);
4694 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
4695 std::swap(TVal, FVal);
4696 std::swap(CTVal, CFVal);
4697 CC = ISD::getSetCCInverse(CC, true);
4698 } else if (TVal.getOpcode() == ISD::XOR) {
4699 // If TVal is a NOT we want to swap TVal and FVal so that we can match
4700 // with a CSINV rather than a CSEL.
4701 if (isAllOnesConstant(TVal.getOperand(1))) {
4702 std::swap(TVal, FVal);
4703 std::swap(CTVal, CFVal);
4704 CC = ISD::getSetCCInverse(CC, true);
4705 }
4706 } else if (TVal.getOpcode() == ISD::SUB) {
4707 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
4708 // that we can match with a CSNEG rather than a CSEL.
4709 if (isNullConstant(TVal.getOperand(0))) {
4710 std::swap(TVal, FVal);
4711 std::swap(CTVal, CFVal);
4712 CC = ISD::getSetCCInverse(CC, true);
4713 }
4714 } else if (CTVal && CFVal) {
4715 const int64_t TrueVal = CTVal->getSExtValue();
4716 const int64_t FalseVal = CFVal->getSExtValue();
4717 bool Swap = false;
4718
4719 // If both TVal and FVal are constants, see if FVal is the
4720 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
4721 // instead of a CSEL in that case.
4722 if (TrueVal == ~FalseVal) {
4723 Opcode = AArch64ISD::CSINV;
4724 } else if (TrueVal == -FalseVal) {
4725 Opcode = AArch64ISD::CSNEG;
4726 } else if (TVal.getValueType() == MVT::i32) {
4727 // If our operands are only 32-bit wide, make sure we use 32-bit
4728 // arithmetic for the check whether we can use CSINC. This ensures that
4729 // the addition in the check will wrap around properly in case there is
4730 // an overflow (which would not be the case if we do the check with
4731 // 64-bit arithmetic).
4732 const uint32_t TrueVal32 = CTVal->getZExtValue();
4733 const uint32_t FalseVal32 = CFVal->getZExtValue();
4734
4735 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
4736 Opcode = AArch64ISD::CSINC;
4737
4738 if (TrueVal32 > FalseVal32) {
4739 Swap = true;
4740 }
4741 }
4742 // 64-bit check whether we can use CSINC.
4743 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
4744 Opcode = AArch64ISD::CSINC;
4745
4746 if (TrueVal > FalseVal) {
4747 Swap = true;
4748 }
4749 }
4750
4751 // Swap TVal and FVal if necessary.
4752 if (Swap) {
4753 std::swap(TVal, FVal);
4754 std::swap(CTVal, CFVal);
4755 CC = ISD::getSetCCInverse(CC, true);
4756 }
4757
4758 if (Opcode != AArch64ISD::CSEL) {
4759 // Drop FVal since we can get its value by simply inverting/negating
4760 // TVal.
4761 FVal = TVal;
4762 }
4763 }
4764
4765 // Avoid materializing a constant when possible by reusing a known value in
4766 // a register. However, don't perform this optimization if the known value
4767 // is one, zero or negative one in the case of a CSEL. We can always
4768 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
4769 // FVal, respectively.
4770 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
4771 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
4772 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
4773 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4774 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
4775 // "a != C ? x : a" to avoid materializing C.
4776 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
4777 TVal = LHS;
4778 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
4779 FVal = LHS;
4780 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
4781 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")((CTVal && CFVal && "Expected constant operands for CSNEG."
) ? static_cast<void> (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4781, __PRETTY_FUNCTION__))
;
4782 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
4783 // avoid materializing C.
4784 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4785 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
4786 Opcode = AArch64ISD::CSINV;
4787 TVal = LHS;
4788 FVal = DAG.getConstant(0, dl, FVal.getValueType());
4789 }
4790 }
4791
4792 SDValue CCVal;
4793 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4794 EVT VT = TVal.getValueType();
4795 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
4796 }
4797
4798 // Now we know we're dealing with FP values.
4799 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4800, __PRETTY_FUNCTION__))
4800 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4800, __PRETTY_FUNCTION__))
;
4801 assert(LHS.getValueType() == RHS.getValueType())((LHS.getValueType() == RHS.getValueType()) ? static_cast<
void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4801, __PRETTY_FUNCTION__))
;
4802 EVT VT = TVal.getValueType();
4803 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4804
4805 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4806 // clean. Some of them require two CSELs to implement.
4807 AArch64CC::CondCode CC1, CC2;
4808 changeFPCCToAArch64CC(CC, CC1, CC2);
4809
4810 if (DAG.getTarget().Options.UnsafeFPMath) {
4811 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
4812 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
4813 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
4814 if (RHSVal && RHSVal->isZero()) {
4815 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
4816 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
4817
4818 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
4819 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
4820 TVal = LHS;
4821 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
4822 CFVal && CFVal->isZero() &&
4823 FVal.getValueType() == LHS.getValueType())
4824 FVal = LHS;
4825 }
4826 }
4827
4828 // Emit first, and possibly only, CSEL.
4829 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4830 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4831
4832 // If we need a second CSEL, emit it, using the output of the first as the
4833 // RHS. We're effectively OR'ing the two CC's together.
4834 if (CC2 != AArch64CC::AL) {
4835 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4836 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4837 }
4838
4839 // Otherwise, return the output of the first CSEL.
4840 return CS1;
4841}
4842
4843SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
4844 SelectionDAG &DAG) const {
4845 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4846 SDValue LHS = Op.getOperand(0);
4847 SDValue RHS = Op.getOperand(1);
4848 SDValue TVal = Op.getOperand(2);
4849 SDValue FVal = Op.getOperand(3);
4850 SDLoc DL(Op);
4851 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4852}
4853
4854SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
4855 SelectionDAG &DAG) const {
4856 SDValue CCVal = Op->getOperand(0);
4857 SDValue TVal = Op->getOperand(1);
4858 SDValue FVal = Op->getOperand(2);
4859 SDLoc DL(Op);
4860
4861 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
4862 // instruction.
4863 if (isOverflowIntrOpRes(CCVal)) {
4864 // Only lower legal XALUO ops.
4865 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
4866 return SDValue();
4867
4868 AArch64CC::CondCode OFCC;
4869 SDValue Value, Overflow;
4870 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
4871 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
4872
4873 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
4874 CCVal, Overflow);
4875 }
4876
4877 // Lower it the same way as we would lower a SELECT_CC node.
4878 ISD::CondCode CC;
4879 SDValue LHS, RHS;
4880 if (CCVal.getOpcode() == ISD::SETCC) {
4881 LHS = CCVal.getOperand(0);
4882 RHS = CCVal.getOperand(1);
4883 CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
4884 } else {
4885 LHS = CCVal;
4886 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
4887 CC = ISD::SETNE;
4888 }
4889 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4890}
4891
4892SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
4893 SelectionDAG &DAG) const {
4894 // Jump table entries as PC relative offsets. No additional tweaking
4895 // is necessary here. Just get the address of the jump table.
4896 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
4897
4898 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4899 !Subtarget->isTargetMachO()) {
4900 return getAddrLarge(JT, DAG);
4901 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4902 return getAddrTiny(JT, DAG);
4903 }
4904 return getAddr(JT, DAG);
4905}
4906
4907SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
4908 SelectionDAG &DAG) const {
4909 // Jump table entries as PC relative offsets. No additional tweaking
4910 // is necessary here. Just get the address of the jump table.
4911 SDLoc DL(Op);
4912 SDValue JT = Op.getOperand(1);
4913 SDValue Entry = Op.getOperand(2);
4914 int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
4915
4916 SDNode *Dest =
4917 DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
4918 Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
4919 return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
4920 SDValue(Dest, 0));
4921}
4922
4923SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
4924 SelectionDAG &DAG) const {
4925 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
4926
4927 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4928 // Use the GOT for the large code model on iOS.
4929 if (Subtarget->isTargetMachO()) {
4930 return getGOT(CP, DAG);
4931 }
4932 return getAddrLarge(CP, DAG);
4933 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4934 return getAddrTiny(CP, DAG);
4935 } else {
4936 return getAddr(CP, DAG);
4937 }
4938}
4939
4940SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
4941 SelectionDAG &DAG) const {
4942 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
4943 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4944 !Subtarget->isTargetMachO()) {
4945 return getAddrLarge(BA, DAG);
4946 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4947 return getAddrTiny(BA, DAG);
4948 }
4949 return getAddr(BA, DAG);
4950}
4951
4952SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
4953 SelectionDAG &DAG) const {
4954 AArch64FunctionInfo *FuncInfo =
4955 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4956
4957 SDLoc DL(Op);
4958 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
4959 getPointerTy(DAG.getDataLayout()));
4960 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4961 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4962 MachinePointerInfo(SV));
4963}
4964
4965SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
4966 SelectionDAG &DAG) const {
4967 AArch64FunctionInfo *FuncInfo =
4968 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4969
4970 SDLoc DL(Op);
4971 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
4972 ? FuncInfo->getVarArgsGPRIndex()
4973 : FuncInfo->getVarArgsStackIndex(),
4974 getPointerTy(DAG.getDataLayout()));
4975 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4976 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4977 MachinePointerInfo(SV));
4978}
4979
4980SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
4981 SelectionDAG &DAG) const {
4982 // The layout of the va_list struct is specified in the AArch64 Procedure Call
4983 // Standard, section B.3.
4984 MachineFunction &MF = DAG.getMachineFunction();
4985 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4986 auto PtrVT = getPointerTy(DAG.getDataLayout());
4987 SDLoc DL(Op);
4988
4989 SDValue Chain = Op.getOperand(0);
4990 SDValue VAList = Op.getOperand(1);
4991 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4992 SmallVector<SDValue, 4> MemOps;
4993
4994 // void *__stack at offset 0
4995 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
4996 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
4997 MachinePointerInfo(SV), /* Alignment = */ 8));
4998
4999 // void *__gr_top at offset 8
5000 int GPRSize = FuncInfo->getVarArgsGPRSize();
5001 if (GPRSize > 0) {
5002 SDValue GRTop, GRTopAddr;
5003
5004 GRTopAddr =
5005 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
5006
5007 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
5008 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
5009 DAG.getConstant(GPRSize, DL, PtrVT));
5010
5011 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
5012 MachinePointerInfo(SV, 8),
5013 /* Alignment = */ 8));
5014 }
5015
5016 // void *__vr_top at offset 16
5017 int FPRSize = FuncInfo->getVarArgsFPRSize();
5018 if (FPRSize > 0) {
5019 SDValue VRTop, VRTopAddr;
5020 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5021 DAG.getConstant(16, DL, PtrVT));
5022
5023 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
5024 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
5025 DAG.getConstant(FPRSize, DL, PtrVT));
5026
5027 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
5028 MachinePointerInfo(SV, 16),
5029 /* Alignment = */ 8));
5030 }
5031
5032 // int __gr_offs at offset 24
5033 SDValue GROffsAddr =
5034 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
5035 MemOps.push_back(DAG.getStore(
5036 Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
5037 MachinePointerInfo(SV, 24), /* Alignment = */ 4));
5038
5039 // int __vr_offs at offset 28
5040 SDValue VROffsAddr =
5041 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
5042 MemOps.push_back(DAG.getStore(
5043 Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
5044 MachinePointerInfo(SV, 28), /* Alignment = */ 4));
5045
5046 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5047}
5048
5049SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
5050 SelectionDAG &DAG) const {
5051 MachineFunction &MF = DAG.getMachineFunction();
5052
5053 if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
5054 return LowerWin64_VASTART(Op, DAG);
5055 else if (Subtarget->isTargetDarwin())
5056 return LowerDarwin_VASTART(Op, DAG);
5057 else
5058 return LowerAAPCS_VASTART(Op, DAG);
5059}
5060
5061SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
5062 SelectionDAG &DAG) const {
5063 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
5064 // pointer.
5065 SDLoc DL(Op);
5066 unsigned VaListSize =
5067 Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
5068 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
5069 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
5070
5071 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
5072 Op.getOperand(2),
5073 DAG.getConstant(VaListSize, DL, MVT::i32),
5074 8, false, false, false, MachinePointerInfo(DestSV),
5075 MachinePointerInfo(SrcSV));
5076}
5077
5078SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
5079 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5080, __PRETTY_FUNCTION__))
5080 "automatic va_arg instruction only works on Darwin")((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5080, __PRETTY_FUNCTION__))
;
5081
5082 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5083 EVT VT = Op.getValueType();
5084 SDLoc DL(Op);
5085 SDValue Chain = Op.getOperand(0);
5086 SDValue Addr = Op.getOperand(1);
5087 unsigned Align = Op.getConstantOperandVal(3);
5088 auto PtrVT = getPointerTy(DAG.getDataLayout());
5089
5090 SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V));
5091 Chain = VAList.getValue(1);
5092
5093 if (Align > 8) {
5094 assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2")((((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"
) ? static_cast<void> (0) : __assert_fail ("((Align & (Align - 1)) == 0) && \"Expected Align to be a power of 2\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5094, __PRETTY_FUNCTION__))
;
5095 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5096 DAG.getConstant(Align - 1, DL, PtrVT));
5097 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
5098 DAG.getConstant(-(int64_t)Align, DL, PtrVT));
5099 }
5100
5101 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
5102 uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
5103
5104 // Scalar integer and FP values smaller than 64 bits are implicitly extended
5105 // up to 64 bits. At the very least, we have to increase the striding of the
5106 // vaargs list to match this, and for FP values we need to introduce
5107 // FP_ROUND nodes as well.
5108 if (VT.isInteger() && !VT.isVector())
5109 ArgSize = 8;
5110 bool NeedFPTrunc = false;
5111 if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
5112 ArgSize = 8;
5113 NeedFPTrunc = true;
5114 }
5115
5116 // Increment the pointer, VAList, to the next vaarg
5117 SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5118 DAG.getConstant(ArgSize, DL, PtrVT));
5119 // Store the incremented VAList to the legalized pointer
5120 SDValue APStore =
5121 DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
5122
5123 // Load the actual argument out of the pointer VAList
5124 if (NeedFPTrunc) {
5125 // Load the value as an f64.
5126 SDValue WideFP =
5127 DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
5128 // Round the value down to an f32.
5129 SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
5130 DAG.getIntPtrConstant(1, DL));
5131 SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
5132 // Merge the rounded value with the chain output of the load.
5133 return DAG.getMergeValues(Ops, DL);
5134 }
5135
5136 return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
5137}
5138
5139SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
5140 SelectionDAG &DAG) const {
5141 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5142 MFI.setFrameAddressIsTaken(true);
5143
5144 EVT VT = Op.getValueType();
5145 SDLoc DL(Op);
5146 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5147 SDValue FrameAddr =
5148 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
5149 while (Depth--)
5150 FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
5151 MachinePointerInfo());
5152 return FrameAddr;
5153}
5154
5155// FIXME? Maybe this could be a TableGen attribute on some registers and
5156// this table could be generated automatically from RegInfo.
5157unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
5158 SelectionDAG &DAG) const {
5159 unsigned Reg = StringSwitch<unsigned>(RegName)
5160 .Case("sp", AArch64::SP)
5161 .Case("x1", AArch64::X1)
5162 .Case("w1", AArch64::W1)
5163 .Case("x2", AArch64::X2)
5164 .Case("w2", AArch64::W2)
5165 .Case("x3", AArch64::X3)
5166 .Case("w3", AArch64::W3)
5167 .Case("x4", AArch64::X4)
5168 .Case("w4", AArch64::W4)
5169 .Case("x5", AArch64::X5)
5170 .Case("w5", AArch64::W5)
5171 .Case("x6", AArch64::X6)
5172 .Case("w6", AArch64::W6)
5173 .Case("x7", AArch64::X7)
5174 .Case("w7", AArch64::W7)
5175 .Case("x18", AArch64::X18)
5176 .Case("w18", AArch64::W18)
5177 .Case("x20", AArch64::X20)
5178 .Case("w20", AArch64::W20)
5179 .Default(0);
5180 if (((Reg == AArch64::X1 || Reg == AArch64::W1) &&
5181 !Subtarget->isXRegisterReserved(1)) ||
5182 ((Reg == AArch64::X2 || Reg == AArch64::W2) &&
5183 !Subtarget->isXRegisterReserved(2)) ||
5184 ((Reg == AArch64::X3 || Reg == AArch64::W3) &&
5185 !Subtarget->isXRegisterReserved(3)) ||
5186 ((Reg == AArch64::X4 || Reg == AArch64::W4) &&
5187 !Subtarget->isXRegisterReserved(4)) ||
5188 ((Reg == AArch64::X5 || Reg == AArch64::W5) &&
5189 !Subtarget->isXRegisterReserved(5)) ||
5190 ((Reg == AArch64::X6 || Reg == AArch64::W6) &&
5191 !Subtarget->isXRegisterReserved(6)) ||
5192 ((Reg == AArch64::X7 || Reg == AArch64::W7) &&
5193 !Subtarget->isXRegisterReserved(7)) ||
5194 ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
5195 !Subtarget->isXRegisterReserved(18)) ||
5196 ((Reg == AArch64::X20 || Reg == AArch64::W20) &&
5197 !Subtarget->isXRegisterReserved(20)))
5198 Reg = 0;
5199 if (Reg)
5200 return Reg;
5201 report_fatal_error(Twine("Invalid register name \""
5202 + StringRef(RegName) + "\"."));
5203}
5204
5205SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
5206 SelectionDAG &DAG) const {
5207 MachineFunction &MF = DAG.getMachineFunction();
5208 MachineFrameInfo &MFI = MF.getFrameInfo();
5209 MFI.setReturnAddressIsTaken(true);
5210
5211 EVT VT = Op.getValueType();
5212 SDLoc DL(Op);
5213 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5214 if (Depth) {
5215 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5216 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
5217 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
5218 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
5219 MachinePointerInfo());
5220 }
5221
5222 // Return LR, which contains the return address. Mark it an implicit live-in.
5223 unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
5224 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
5225}
5226
5227/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
5228/// i64 values and take a 2 x i64 value to shift plus a shift amount.
5229SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
5230 SelectionDAG &DAG) const {
5231 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5231, __PRETTY_FUNCTION__))
;
5232 EVT VT = Op.getValueType();
5233 unsigned VTBits = VT.getSizeInBits();
5234 SDLoc dl(Op);
5235 SDValue ShOpLo = Op.getOperand(0);
5236 SDValue ShOpHi = Op.getOperand(1);
5237 SDValue ShAmt = Op.getOperand(2);
5238 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
5239
5240 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::
SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5240, __PRETTY_FUNCTION__))
;
5241
5242 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
5243 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
5244 SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
5245
5246 // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
5247 // is "undef". We wanted 0, so CSEL it directly.
5248 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
5249 ISD::SETEQ, dl, DAG);
5250 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
5251 HiBitsForLo =
5252 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
5253 HiBitsForLo, CCVal, Cmp);
5254
5255 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
5256 DAG.getConstant(VTBits, dl, MVT::i64));
5257
5258 SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
5259 SDValue LoForNormalShift =
5260 DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
5261
5262 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
5263 dl, DAG);
5264 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
5265 SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
5266 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
5267 LoForNormalShift, CCVal, Cmp);
5268
5269 // AArch64 shifts larger than the register width are wrapped rather than
5270 // clamped, so we can't just emit "hi >> x".
5271 SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
5272 SDValue HiForBigShift =
5273 Opc == ISD::SRA
5274 ? DAG.getNode(Opc, dl, VT, ShOpHi,
5275 DAG.getConstant(VTBits - 1, dl, MVT::i64))
5276 : DAG.getConstant(0, dl, VT);
5277 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
5278 HiForNormalShift, CCVal, Cmp);
5279
5280 SDValue Ops[2] = { Lo, Hi };
5281 return DAG.getMergeValues(Ops, dl);
5282}
5283
5284/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
5285/// i64 values and take a 2 x i64 value to shift plus a shift amount.
5286SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
5287 SelectionDAG &DAG) const {
5288 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5288, __PRETTY_FUNCTION__))
;
5289 EVT VT = Op.getValueType();
5290 unsigned VTBits = VT.getSizeInBits();
5291 SDLoc dl(Op);
5292 SDValue ShOpLo = Op.getOperand(0);
5293 SDValue ShOpHi = Op.getOperand(1);
5294 SDValue ShAmt = Op.getOperand(2);
5295
5296 assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void>
(0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5296, __PRETTY_FUNCTION__))
;
5297 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
5298 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
5299 SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
5300
5301 // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
5302 // is "undef". We wanted 0, so CSEL it directly.
5303 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
5304 ISD::SETEQ, dl, DAG);
5305 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
5306 LoBitsForHi =
5307 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
5308 LoBitsForHi, CCVal, Cmp);
5309
5310 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
5311 DAG.getConstant(VTBits, dl, MVT::i64));
5312 SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
5313 SDValue HiForNormalShift =
5314 DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
5315
5316 SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
5317
5318 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
5319 dl, DAG);
5320 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
5321 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
5322 HiForNormalShift, CCVal, Cmp);
5323
5324 // AArch64 shifts of larger than register sizes are wrapped rather than
5325 // clamped, so we can't just emit "lo << a" if a is too big.
5326 SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
5327 SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
5328 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
5329 LoForNormalShift, CCVal, Cmp);
5330
5331 SDValue Ops[2] = { Lo, Hi };
5332 return DAG.getMergeValues(Ops, dl);
5333}
5334
5335bool AArch64TargetLowering::isOffsetFoldingLegal(
5336 const GlobalAddressSDNode *GA) const {
5337 // Offsets are folded in the DAG combine rather than here so that we can
5338 // intelligently choose an offset based on the uses.
5339 return false;
5340}
5341
5342bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
5343 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
5344 // FIXME: We should be able to handle f128 as well with a clever lowering.
5345 if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
5346 (VT == MVT::f16 && Subtarget->hasFullFP16()))) {
5347 LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString() << " imm value: 0\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal " << VT.getEVTString
() << " imm value: 0\n"; } } while (false)
;
5348 return true;
5349 }
5350
5351 bool IsLegal = false;
5352 SmallString<128> ImmStrVal;
5353 Imm.toString(ImmStrVal);
5354
5355 if (VT == MVT::f64)
5356 IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
5357 else if (VT == MVT::f32)
5358 IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
5359 else if (VT == MVT::f16 && Subtarget->hasFullFP16())
5360 IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
5361
5362 if (IsLegal) {
5363 LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal " << VT.getEVTString
() << " imm value: " << ImmStrVal << "\n"; }
} while (false)
5364 << " imm value: " << ImmStrVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal " << VT.getEVTString
() << " imm value: " << ImmStrVal << "\n"; }
} while (false)
;
5365 return true;
5366 }
5367
5368 LLVM_DEBUG(dbgs() << "Illegal " << VT.getEVTString()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal " << VT.getEVTString
() << " imm value: " << ImmStrVal << "\n"; }
} while (false)
5369 << " imm value: " << ImmStrVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal " << VT.getEVTString
() << " imm value: " << ImmStrVal << "\n"; }
} while (false)
;
5370 return false;
5371}
5372
5373//===----------------------------------------------------------------------===//
5374// AArch64 Optimization Hooks
5375//===----------------------------------------------------------------------===//
5376
5377static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
5378 SDValue Operand, SelectionDAG &DAG,
5379 int &ExtraSteps) {
5380 EVT VT = Operand.getValueType();
5381 if (ST->hasNEON() &&
5382 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
5383 VT == MVT::f32 || VT == MVT::v1f32 ||
5384 VT == MVT::v2f32 || VT == MVT::v4f32)) {
5385 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
5386 // For the reciprocal estimates, convergence is quadratic, so the number
5387 // of digits is doubled after each iteration. In ARMv8, the accuracy of
5388 // the initial estimate is 2^-8. Thus the number of extra steps to refine
5389 // the result for float (23 mantissa bits) is 2 and for double (52
5390 // mantissa bits) is 3.
5391 ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
5392
5393 return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
5394 }
5395
5396 return SDValue();
5397}
5398
5399SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
5400 SelectionDAG &DAG, int Enabled,
5401 int &ExtraSteps,
5402 bool &UseOneConst,
5403 bool Reciprocal) const {
5404 if (Enabled == ReciprocalEstimate::Enabled ||
5405 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
5406 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
5407 DAG, ExtraSteps)) {
5408 SDLoc DL(Operand);
5409 EVT VT = Operand.getValueType();
5410
5411 SDNodeFlags Flags;
5412 Flags.setAllowReassociation(true);
5413
5414 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
5415 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
5416 for (int i = ExtraSteps; i > 0; --i) {
5417 SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
5418 Flags);
5419 Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
5420 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5421 }
5422 if (!Reciprocal) {
5423 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
5424 VT);
5425 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
5426 SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
5427
5428 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
5429 // Correct the result if the operand is 0.0.
5430 Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
5431 VT, Eq, Operand, Estimate);
5432 }
5433
5434 ExtraSteps = 0;
5435 return Estimate;
5436 }
5437
5438 return SDValue();
5439}
5440
5441SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
5442 SelectionDAG &DAG, int Enabled,
5443 int &ExtraSteps) const {
5444 if (Enabled == ReciprocalEstimate::Enabled)
5445 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
5446 DAG, ExtraSteps)) {
5447 SDLoc DL(Operand);
5448 EVT VT = Operand.getValueType();
5449
5450 SDNodeFlags Flags;
5451 Flags.setAllowReassociation(true);
5452
5453 // Newton reciprocal iteration: E * (2 - X * E)
5454 // AArch64 reciprocal iteration instruction: (2 - M * N)
5455 for (int i = ExtraSteps; i > 0; --i) {
5456 SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
5457 Estimate, Flags);
5458 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5459 }
5460
5461 ExtraSteps = 0;
5462 return Estimate;
5463 }
5464
5465 return SDValue();
5466}
5467
5468//===----------------------------------------------------------------------===//
5469// AArch64 Inline Assembly Support
5470//===----------------------------------------------------------------------===//
5471
5472// Table of Constraints
5473// TODO: This is the current set of constraints supported by ARM for the
5474// compiler, not all of them may make sense.
5475//
5476// r - A general register
5477// w - An FP/SIMD register of some size in the range v0-v31
5478// x - An FP/SIMD register of some size in the range v0-v15
5479// I - Constant that can be used with an ADD instruction
5480// J - Constant that can be used with a SUB instruction
5481// K - Constant that can be used with a 32-bit logical instruction
5482// L - Constant that can be used with a 64-bit logical instruction
5483// M - Constant that can be used as a 32-bit MOV immediate
5484// N - Constant that can be used as a 64-bit MOV immediate
5485// Q - A memory reference with base register and no offset
5486// S - A symbolic address
5487// Y - Floating point constant zero
5488// Z - Integer constant zero
5489//
5490// Note that general register operands will be output using their 64-bit x
5491// register name, whatever the size of the variable, unless the asm operand
5492// is prefixed by the %w modifier. Floating-point and SIMD register operands
5493// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
5494// %q modifier.
5495const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5496 // At this point, we have to lower this constraint to something else, so we
5497 // lower it to an "r" or "w". However, by doing this we will force the result
5498 // to be in register, while the X constraint is much more permissive.
5499 //
5500 // Although we are correct (we are free to emit anything, without
5501 // constraints), we might break use cases that would expect us to be more
5502 // efficient and emit something else.
5503 if (!Subtarget->hasFPARMv8())
5504 return "r";
5505
5506 if (ConstraintVT.isFloatingPoint())
5507 return "w";
5508
5509 if (ConstraintVT.isVector() &&
5510 (ConstraintVT.getSizeInBits() == 64 ||
5511 ConstraintVT.getSizeInBits() == 128))
5512 return "w";
5513
5514 return "r";
5515}
5516
5517/// getConstraintType - Given a constraint letter, return the type of
5518/// constraint it is for this target.
5519AArch64TargetLowering::ConstraintType
5520AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
5521 if (Constraint.size() == 1) {
5522 switch (Constraint[0]) {
5523 default:
5524 break;
5525 case 'z':
5526 return C_Other;
5527 case 'x':
5528 case 'w':
5529 return C_RegisterClass;
5530 // An address with a single base register. Due to the way we
5531 // currently handle addresses it is the same as 'r'.
5532 case 'Q':
5533 return C_Memory;
5534 case 'S': // A symbolic address
5535 return C_Other;
5536 }
5537 }
5538 return TargetLowering::getConstraintType(Constraint);
5539}
5540
5541/// Examine constraint type and operand type and determine a weight value.
5542/// This object must already have been set up with the operand type
5543/// and the current alternative constraint selected.
5544TargetLowering::ConstraintWeight
5545AArch64TargetLowering::getSingleConstraintMatchWeight(
5546 AsmOperandInfo &info, const char *constraint) const {
5547 ConstraintWeight weight = CW_Invalid;
5548 Value *CallOperandVal = info.CallOperandVal;
5549 // If we don't have a value, we can't do a match,
5550 // but allow it at the lowest weight.
5551 if (!CallOperandVal)
5552 return CW_Default;
5553 Type *type = CallOperandVal->getType();
5554 // Look at the constraint type.
5555 switch (*constraint) {
5556 default:
5557 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
5558 break;
5559 case 'x':
5560 case 'w':
5561 if (type->isFloatingPointTy() || type->isVectorTy())
5562 weight = CW_Register;
5563 break;
5564 case 'z':
5565 weight = CW_Constant;
5566 break;
5567 }
5568 return weight;
5569}
5570
5571std::pair<unsigned, const TargetRegisterClass *>
5572AArch64TargetLowering::getRegForInlineAsmConstraint(
5573 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5574 if (Constraint.size() == 1) {
5575 switch (Constraint[0]) {
5576 case 'r':
5577 if (VT.getSizeInBits() == 64)
5578 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
5579 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
5580 case 'w':
5581 if (!Subtarget->hasFPARMv8())
5582 break;
5583 if (VT.getSizeInBits() == 16)
5584 return std::make_pair(0U, &AArch64::FPR16RegClass);
5585 if (VT.getSizeInBits() == 32)
5586 return std::make_pair(0U, &AArch64::FPR32RegClass);
5587 if (VT.getSizeInBits() == 64)
5588 return std::make_pair(0U, &AArch64::FPR64RegClass);
5589 if (VT.getSizeInBits() == 128)
5590 return std::make_pair(0U, &AArch64::FPR128RegClass);
5591 break;
5592 // The instructions that this constraint is designed for can
5593 // only take 128-bit registers so just use that regclass.
5594 case 'x':
5595 if (!Subtarget->hasFPARMv8())
5596 break;
5597 if (VT.getSizeInBits() == 128)
5598 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
5599 break;
5600 }
5601 }
5602 if (StringRef("{cc}").equals_lower(Constraint))
5603 return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
5604
5605 // Use the default implementation in TargetLowering to convert the register
5606 // constraint into a member of a register class.
5607 std::pair<unsigned, const TargetRegisterClass *> Res;
5608 Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5609
5610 // Not found as a standard register?
5611 if (!Res.second) {
5612 unsigned Size = Constraint.size();
5613 if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
5614 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
5615 int RegNo;
5616 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
5617 if (!Failed && RegNo >= 0 && RegNo <= 31) {
5618 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
5619 // By default we'll emit v0-v31 for this unless there's a modifier where
5620 // we'll emit the correct register as well.
5621 if (VT != MVT::Other && VT.getSizeInBits() == 64) {
5622 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
5623 Res.second = &AArch64::FPR64RegClass;
5624 } else {
5625 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
5626 Res.second = &AArch64::FPR128RegClass;
5627 }
5628 }
5629 }
5630 }
5631
5632 if (Res.second && !Subtarget->hasFPARMv8() &&
5633 !AArch64::GPR32allRegClass.hasSubClassEq(Res.second) &&
5634 !AArch64::GPR64allRegClass.hasSubClassEq(Res.second))
5635 return std::make_pair(0U, nullptr);
5636
5637 return Res;
5638}
5639
5640/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5641/// vector. If it is invalid, don't add anything to Ops.
5642void AArch64TargetLowering::LowerAsmOperandForConstraint(
5643 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
5644 SelectionDAG &DAG) const {
5645 SDValue Result;
5646
5647 // Currently only support length 1 constraints.
5648 if (Constraint.length() != 1)
5649 return;
5650
5651 char ConstraintLetter = Constraint[0];
5652 switch (ConstraintLetter) {
5653 default:
5654 break;
5655
5656 // This set of constraints deal with valid constants for various instructions.
5657 // Validate and return a target constant for them if we can.
5658 case 'z': {
5659 // 'z' maps to xzr or wzr so it needs an input of 0.
5660 if (!isNullConstant(Op))
5661 return;
5662
5663 if (Op.getValueType() == MVT::i64)
5664 Result = DAG.getRegister(AArch64::XZR, MVT::i64);
5665 else
5666 Result = DAG.getRegister(AArch64::WZR, MVT::i32);
5667 break;
5668 }
5669 case 'S': {
5670 // An absolute symbolic address or label reference.
5671 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5672 Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5673 GA->getValueType(0));
5674 } else if (const BlockAddressSDNode *BA =
5675 dyn_cast<BlockAddressSDNode>(Op)) {
5676 Result =
5677 DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
5678 } else if (const ExternalSymbolSDNode *ES =
5679 dyn_cast<ExternalSymbolSDNode>(Op)) {
5680 Result =
5681 DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0));
5682 } else
5683 return;
5684 break;
5685 }
5686
5687 case 'I':
5688 case 'J':
5689 case 'K':
5690 case 'L':
5691 case 'M':
5692 case 'N':
5693 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5694 if (!C)
5695 return;
5696
5697 // Grab the value and do some validation.
5698 uint64_t CVal = C->getZExtValue();
5699 switch (ConstraintLetter) {
5700 // The I constraint applies only to simple ADD or SUB immediate operands:
5701 // i.e. 0 to 4095 with optional shift by 12
5702 // The J constraint applies only to ADD or SUB immediates that would be
5703 // valid when negated, i.e. if [an add pattern] were to be output as a SUB
5704 // instruction [or vice versa], in other words -1 to -4095 with optional
5705 // left shift by 12.
5706 case 'I':
5707 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
5708 break;
5709 return;
5710 case 'J': {
5711 uint64_t NVal = -C->getSExtValue();
5712 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
5713 CVal = C->getSExtValue();
5714 break;
5715 }
5716 return;
5717 }
5718 // The K and L constraints apply *only* to logical immediates, including
5719 // what used to be the MOVI alias for ORR (though the MOVI alias has now
5720 // been removed and MOV should be used). So these constraints have to
5721 // distinguish between bit patterns that are valid 32-bit or 64-bit
5722 // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
5723 // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
5724 // versa.
5725 case 'K':
5726 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5727 break;
5728 return;
5729 case 'L':
5730 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5731 break;
5732 return;
5733 // The M and N constraints are a superset of K and L respectively, for use
5734 // with the MOV (immediate) alias. As well as the logical immediates they
5735 // also match 32 or 64-bit immediates that can be loaded either using a
5736 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
5737 // (M) or 64-bit 0x1234000000000000 (N) etc.
5738 // As a note some of this code is liberally stolen from the asm parser.
5739 case 'M': {
5740 if (!isUInt<32>(CVal))
5741 return;
5742 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5743 break;
5744 if ((CVal & 0xFFFF) == CVal)
5745 break;
5746 if ((CVal & 0xFFFF0000ULL) == CVal)
5747 break;
5748 uint64_t NCVal = ~(uint32_t)CVal;
5749 if ((NCVal & 0xFFFFULL) == NCVal)
5750 break;
5751 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5752 break;
5753 return;
5754 }
5755 case 'N': {
5756 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5757 break;
5758 if ((CVal & 0xFFFFULL) == CVal)
5759 break;
5760 if ((CVal & 0xFFFF0000ULL) == CVal)
5761 break;
5762 if ((CVal & 0xFFFF00000000ULL) == CVal)
5763 break;
5764 if ((CVal & 0xFFFF000000000000ULL) == CVal)
5765 break;
5766 uint64_t NCVal = ~CVal;
5767 if ((NCVal & 0xFFFFULL) == NCVal)
5768 break;
5769 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5770 break;
5771 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
5772 break;
5773 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
5774 break;
5775 return;
5776 }
5777 default:
5778 return;
5779 }
5780
5781 // All assembler immediates are 64-bit integers.
5782 Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
5783 break;
5784 }
5785
5786 if (Result.getNode()) {
5787 Ops.push_back(Result);
5788 return;
5789 }
5790
5791 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5792}
5793
5794//===----------------------------------------------------------------------===//
5795// AArch64 Advanced SIMD Support
5796//===----------------------------------------------------------------------===//
5797
5798/// WidenVector - Given a value in the V64 register class, produce the
5799/// equivalent value in the V128 register class.
5800static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
5801 EVT VT = V64Reg.getValueType();
5802 unsigned NarrowSize = VT.getVectorNumElements();
5803 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5804 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
5805 SDLoc DL(V64Reg);
5806
5807 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
5808 V64Reg, DAG.getConstant(0, DL, MVT::i32));
5809}
5810
5811/// getExtFactor - Determine the adjustment factor for the position when
5812/// generating an "extract from vector registers" instruction.
5813static unsigned getExtFactor(SDValue &V) {
5814 EVT EltType = V.getValueType().getVectorElementType();
5815 return EltType.getSizeInBits() / 8;
5816}
5817
5818/// NarrowVector - Given a value in the V128 register class, produce the
5819/// equivalent value in the V64 register class.
5820static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
5821 EVT VT = V128Reg.getValueType();
5822 unsigned WideSize = VT.getVectorNumElements();
5823 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5824 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
5825 SDLoc DL(V128Reg);
5826
5827 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
5828}
5829
5830// Gather data to see if the operation can be modelled as a
5831// shuffle in combination with VEXTs.
5832SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
5833 SelectionDAG &DAG) const {
5834 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")((Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5834, __PRETTY_FUNCTION__))
;
5835 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::ReconstructShuffle\n"
; } } while (false)
;
5836 SDLoc dl(Op);
5837 EVT VT = Op.getValueType();
5838 unsigned NumElts = VT.getVectorNumElements();
5839
5840 struct ShuffleSourceInfo {
5841 SDValue Vec;
5842 unsigned MinElt;
5843 unsigned MaxElt;
5844
5845 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
5846 // be compatible with the shuffle we intend to construct. As a result
5847 // ShuffleVec will be some sliding window into the original Vec.
5848 SDValue ShuffleVec;
5849
5850 // Code should guarantee that element i in Vec starts at element "WindowBase
5851 // + i * WindowScale in ShuffleVec".
5852 int WindowBase;
5853 int WindowScale;
5854
5855 ShuffleSourceInfo(SDValue Vec)
5856 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
5857 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
5858
5859 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
5860 };
5861
5862 // First gather all vectors used as an immediate source for this BUILD_VECTOR
5863 // node.
5864 SmallVector<ShuffleSourceInfo, 2> Sources;
5865 for (unsigned i = 0; i < NumElts; ++i) {
5866 SDValue V = Op.getOperand(i);
5867 if (V.isUndef())
5868 continue;
5869 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5870 !isa<ConstantSDNode>(V.getOperand(1))) {
5871 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5872 dbgs() << "Reshuffle failed: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5873 "a shuffle can only come from building a vector from "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5874 "various elements of other vectors, provided their "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5875 "indices are constant\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
;
5876 return SDValue();
5877 }
5878
5879 // Add this element source to the list if it's not already there.
5880 SDValue SourceVec = V.getOperand(0);
5881 auto Source = find(Sources, SourceVec);
5882 if (Source == Sources.end())
5883 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
5884
5885 // Update the minimum and maximum lane number seen.
5886 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
5887 Source->MinElt = std::min(Source->MinElt, EltNo);
5888 Source->MaxElt = std::max(Source->MaxElt, EltNo);
5889 }
5890
5891 if (Sources.size() > 2) {
5892 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
5893 dbgs() << "Reshuffle failed: currently only do something sane when at "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
5894 "most two source vectors are involved\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
;
5895 return SDValue();
5896 }
5897
5898 // Find out the smallest element size among result and two sources, and use
5899 // it as element size to build the shuffle_vector.
5900 EVT SmallestEltTy = VT.getVectorElementType();
5901 for (auto &Source : Sources) {
5902 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
5903 if (SrcEltTy.bitsLT(SmallestEltTy)) {
5904 SmallestEltTy = SrcEltTy;
5905 }
5906 }
5907 unsigned ResMultiplier =
5908 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
5909 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
5910 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
5911
5912 // If the source vector is too wide or too narrow, we may nevertheless be able
5913 // to construct a compatible shuffle either by concatenating it with UNDEF or
5914 // extracting a suitable range of elements.
5915 for (auto &Src : Sources) {
5916 EVT SrcVT = Src.ShuffleVec.getValueType();
5917
5918 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
5919 continue;
5920
5921 // This stage of the search produces a source with the same element type as
5922 // the original, but with a total width matching the BUILD_VECTOR output.
5923 EVT EltVT = SrcVT.getVectorElementType();
5924 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
5925 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
5926
5927 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
5928 assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits())((2 * SrcVT.getSizeInBits() == VT.getSizeInBits()) ? static_cast
<void> (0) : __assert_fail ("2 * SrcVT.getSizeInBits() == VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5928, __PRETTY_FUNCTION__))
;
5929 // We can pad out the smaller vector for free, so if it's part of a
5930 // shuffle...
5931 Src.ShuffleVec =
5932 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
5933 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
5934 continue;
5935 }
5936
5937 assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits())((SrcVT.getSizeInBits() == 2 * VT.getSizeInBits()) ? static_cast
<void> (0) : __assert_fail ("SrcVT.getSizeInBits() == 2 * VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-8~svn345461/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5937, __PRETTY_FUNCTION__))
;
5938
5939 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
5940 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
5941 dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
;
5942 return SDValue();
5943 }
5944
5945 if (Src.MinElt >= NumSrcElts) {
5946 // The extraction can just take the second half
5947 Src.ShuffleVec =
5948 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5949 DAG.getConstant(NumSrcElts, dl, MVT::i64));
5950 Src.WindowBase = -NumSrcElts;
5951 } else if (Src.MaxElt < NumSrcElts) {
5952 // The extraction can just take the first half
5953 Src.ShuffleVec =
5954 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5955 DAG.getConstant(0, dl, MVT::i64));
5956 } else {
5957 // An actual VEXT is needed