Bug Summary

File:llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 9551, column 48
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/include -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-01-13-084841-49055-1 -x c++ /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/StringSwitch.h"
31#include "llvm/ADT/Triple.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/CallingConvLower.h"
35#include "llvm/CodeGen/MachineBasicBlock.h"
36#include "llvm/CodeGen/MachineFrameInfo.h"
37#include "llvm/CodeGen/MachineFunction.h"
38#include "llvm/CodeGen/MachineInstr.h"
39#include "llvm/CodeGen/MachineInstrBuilder.h"
40#include "llvm/CodeGen/MachineMemOperand.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/RuntimeLibcalls.h"
43#include "llvm/CodeGen/SelectionDAG.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/TargetCallingConv.h"
46#include "llvm/CodeGen/TargetInstrInfo.h"
47#include "llvm/CodeGen/ValueTypes.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugLoc.h"
52#include "llvm/IR/DerivedTypes.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/GetElementPtrTypeIterator.h"
55#include "llvm/IR/GlobalValue.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/Instruction.h"
58#include "llvm/IR/Instructions.h"
59#include "llvm/IR/IntrinsicInst.h"
60#include "llvm/IR/Intrinsics.h"
61#include "llvm/IR/IntrinsicsAArch64.h"
62#include "llvm/IR/Module.h"
63#include "llvm/IR/OperandTraits.h"
64#include "llvm/IR/PatternMatch.h"
65#include "llvm/IR/Type.h"
66#include "llvm/IR/Use.h"
67#include "llvm/IR/Value.h"
68#include "llvm/MC/MCRegisterInfo.h"
69#include "llvm/Support/Casting.h"
70#include "llvm/Support/CodeGen.h"
71#include "llvm/Support/CommandLine.h"
72#include "llvm/Support/Compiler.h"
73#include "llvm/Support/Debug.h"
74#include "llvm/Support/ErrorHandling.h"
75#include "llvm/Support/KnownBits.h"
76#include "llvm/Support/MachineValueType.h"
77#include "llvm/Support/MathExtras.h"
78#include "llvm/Support/raw_ostream.h"
79#include "llvm/Target/TargetMachine.h"
80#include "llvm/Target/TargetOptions.h"
81#include <algorithm>
82#include <bitset>
83#include <cassert>
84#include <cctype>
85#include <cstdint>
86#include <cstdlib>
87#include <iterator>
88#include <limits>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace llvm::PatternMatch;
95
96#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
97
98STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
99STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
100STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
101
102static cl::opt<bool>
103EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
104 cl::desc("Allow AArch64 SLI/SRI formation"),
105 cl::init(false));
106
107// FIXME: The necessary dtprel relocations don't seem to be supported
108// well in the GNU bfd and gold linkers at the moment. Therefore, by
109// default, for now, fall back to GeneralDynamic code generation.
110cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
111 "aarch64-elf-ldtls-generation", cl::Hidden,
112 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
113 cl::init(false));
114
115static cl::opt<bool>
116EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
117 cl::desc("Enable AArch64 logical imm instruction "
118 "optimization"),
119 cl::init(true));
120
121/// Value type used for condition codes.
122static const MVT MVT_CC = MVT::i32;
123
124AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
125 const AArch64Subtarget &STI)
126 : TargetLowering(TM), Subtarget(&STI) {
127 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
128 // we have to make something up. Arbitrarily, choose ZeroOrOne.
129 setBooleanContents(ZeroOrOneBooleanContent);
130 // When comparing vectors the result sets the different elements in the
131 // vector to all-one or all-zero.
132 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
133
134 // Set up the register classes.
135 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
136 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
137
138 if (Subtarget->hasFPARMv8()) {
139 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
140 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
141 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
142 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
143 }
144
145 if (Subtarget->hasNEON()) {
146 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
147 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
148 // Someone set us up the NEON.
149 addDRTypeForNEON(MVT::v2f32);
150 addDRTypeForNEON(MVT::v8i8);
151 addDRTypeForNEON(MVT::v4i16);
152 addDRTypeForNEON(MVT::v2i32);
153 addDRTypeForNEON(MVT::v1i64);
154 addDRTypeForNEON(MVT::v1f64);
155 addDRTypeForNEON(MVT::v4f16);
156
157 addQRTypeForNEON(MVT::v4f32);
158 addQRTypeForNEON(MVT::v2f64);
159 addQRTypeForNEON(MVT::v16i8);
160 addQRTypeForNEON(MVT::v8i16);
161 addQRTypeForNEON(MVT::v4i32);
162 addQRTypeForNEON(MVT::v2i64);
163 addQRTypeForNEON(MVT::v8f16);
164 }
165
166 if (Subtarget->hasSVE()) {
167 // Add legal sve predicate types
168 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
169 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
170 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
171 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
172
173 // Add legal sve data types
174 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
175 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
176 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
177 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
178
179 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
180 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
181 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
182 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
183 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
184 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
185
186 for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
187 setOperationAction(ISD::SADDSAT, VT, Legal);
188 setOperationAction(ISD::UADDSAT, VT, Legal);
189 setOperationAction(ISD::SSUBSAT, VT, Legal);
190 setOperationAction(ISD::USUBSAT, VT, Legal);
191 }
192
193 for (auto VT :
194 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
195 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
196 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
197 }
198
199 // Compute derived properties from the register classes
200 computeRegisterProperties(Subtarget->getRegisterInfo());
201
202 // Provide all sorts of operation actions
203 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
204 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
205 setOperationAction(ISD::SETCC, MVT::i32, Custom);
206 setOperationAction(ISD::SETCC, MVT::i64, Custom);
207 setOperationAction(ISD::SETCC, MVT::f16, Custom);
208 setOperationAction(ISD::SETCC, MVT::f32, Custom);
209 setOperationAction(ISD::SETCC, MVT::f64, Custom);
210 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
211 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
212 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
213 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
214 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
215 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
216 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
217 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
218 setOperationAction(ISD::SELECT, MVT::i32, Custom);
219 setOperationAction(ISD::SELECT, MVT::i64, Custom);
220 setOperationAction(ISD::SELECT, MVT::f16, Custom);
221 setOperationAction(ISD::SELECT, MVT::f32, Custom);
222 setOperationAction(ISD::SELECT, MVT::f64, Custom);
223 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
224 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
225 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
226 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
227 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
228 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
229 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
230
231 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
232 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
233 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
234
235 setOperationAction(ISD::FREM, MVT::f32, Expand);
236 setOperationAction(ISD::FREM, MVT::f64, Expand);
237 setOperationAction(ISD::FREM, MVT::f80, Expand);
238
239 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
240
241 // Custom lowering hooks are needed for XOR
242 // to fold it into CSINC/CSINV.
243 setOperationAction(ISD::XOR, MVT::i32, Custom);
244 setOperationAction(ISD::XOR, MVT::i64, Custom);
245
246 // Virtually no operation on f128 is legal, but LLVM can't expand them when
247 // there's a valid register class, so we need custom operations in most cases.
248 setOperationAction(ISD::FABS, MVT::f128, Expand);
249 setOperationAction(ISD::FADD, MVT::f128, Custom);
250 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
251 setOperationAction(ISD::FCOS, MVT::f128, Expand);
252 setOperationAction(ISD::FDIV, MVT::f128, Custom);
253 setOperationAction(ISD::FMA, MVT::f128, Expand);
254 setOperationAction(ISD::FMUL, MVT::f128, Custom);
255 setOperationAction(ISD::FNEG, MVT::f128, Expand);
256 setOperationAction(ISD::FPOW, MVT::f128, Expand);
257 setOperationAction(ISD::FREM, MVT::f128, Expand);
258 setOperationAction(ISD::FRINT, MVT::f128, Expand);
259 setOperationAction(ISD::FSIN, MVT::f128, Expand);
260 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
261 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
262 setOperationAction(ISD::FSUB, MVT::f128, Custom);
263 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
264 setOperationAction(ISD::SETCC, MVT::f128, Custom);
265 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
266 setOperationAction(ISD::SELECT, MVT::f128, Custom);
267 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
268 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
269
270 // Lowering for many of the conversions is actually specified by the non-f128
271 // type. The LowerXXX function will be trivial when f128 isn't involved.
272 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
273 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
274 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
275 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
276 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
277 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
278 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
279 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
280 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
281 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
282 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
283 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
284 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
285 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
286
287 // Variable arguments.
288 setOperationAction(ISD::VASTART, MVT::Other, Custom);
289 setOperationAction(ISD::VAARG, MVT::Other, Custom);
290 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
291 setOperationAction(ISD::VAEND, MVT::Other, Expand);
292
293 // Variable-sized objects.
294 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
295 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
296
297 if (Subtarget->isTargetWindows())
298 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
299 else
300 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
301
302 // Constant pool entries
303 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
304
305 // BlockAddress
306 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
307
308 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
309 setOperationAction(ISD::ADDC, MVT::i32, Custom);
310 setOperationAction(ISD::ADDE, MVT::i32, Custom);
311 setOperationAction(ISD::SUBC, MVT::i32, Custom);
312 setOperationAction(ISD::SUBE, MVT::i32, Custom);
313 setOperationAction(ISD::ADDC, MVT::i64, Custom);
314 setOperationAction(ISD::ADDE, MVT::i64, Custom);
315 setOperationAction(ISD::SUBC, MVT::i64, Custom);
316 setOperationAction(ISD::SUBE, MVT::i64, Custom);
317
318 // AArch64 lacks both left-rotate and popcount instructions.
319 setOperationAction(ISD::ROTL, MVT::i32, Expand);
320 setOperationAction(ISD::ROTL, MVT::i64, Expand);
321 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
322 setOperationAction(ISD::ROTL, VT, Expand);
323 setOperationAction(ISD::ROTR, VT, Expand);
324 }
325
326 // AArch64 doesn't have {U|S}MUL_LOHI.
327 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
328 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
329
330 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
331 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
332
333 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
334 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
335 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
336 setOperationAction(ISD::SDIVREM, VT, Expand);
337 setOperationAction(ISD::UDIVREM, VT, Expand);
338 }
339 setOperationAction(ISD::SREM, MVT::i32, Expand);
340 setOperationAction(ISD::SREM, MVT::i64, Expand);
341 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
342 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
343 setOperationAction(ISD::UREM, MVT::i32, Expand);
344 setOperationAction(ISD::UREM, MVT::i64, Expand);
345
346 // Custom lower Add/Sub/Mul with overflow.
347 setOperationAction(ISD::SADDO, MVT::i32, Custom);
348 setOperationAction(ISD::SADDO, MVT::i64, Custom);
349 setOperationAction(ISD::UADDO, MVT::i32, Custom);
350 setOperationAction(ISD::UADDO, MVT::i64, Custom);
351 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
352 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
353 setOperationAction(ISD::USUBO, MVT::i32, Custom);
354 setOperationAction(ISD::USUBO, MVT::i64, Custom);
355 setOperationAction(ISD::SMULO, MVT::i32, Custom);
356 setOperationAction(ISD::SMULO, MVT::i64, Custom);
357 setOperationAction(ISD::UMULO, MVT::i32, Custom);
358 setOperationAction(ISD::UMULO, MVT::i64, Custom);
359
360 setOperationAction(ISD::FSIN, MVT::f32, Expand);
361 setOperationAction(ISD::FSIN, MVT::f64, Expand);
362 setOperationAction(ISD::FCOS, MVT::f32, Expand);
363 setOperationAction(ISD::FCOS, MVT::f64, Expand);
364 setOperationAction(ISD::FPOW, MVT::f32, Expand);
365 setOperationAction(ISD::FPOW, MVT::f64, Expand);
366 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
367 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
368 if (Subtarget->hasFullFP16())
369 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
370 else
371 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
372
373 setOperationAction(ISD::FREM, MVT::f16, Promote);
374 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
375 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
376 setOperationAction(ISD::FPOW, MVT::f16, Promote);
377 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
378 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
379 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
380 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
381 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
382 setOperationAction(ISD::FCOS, MVT::f16, Promote);
383 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
384 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
385 setOperationAction(ISD::FSIN, MVT::f16, Promote);
386 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
387 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
388 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
389 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
390 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
391 setOperationAction(ISD::FEXP, MVT::f16, Promote);
392 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
393 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
394 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
395 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
396 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
397 setOperationAction(ISD::FLOG, MVT::f16, Promote);
398 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
399 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
400 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
401 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
402 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
403 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
404 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
405 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
406
407 if (!Subtarget->hasFullFP16()) {
408 setOperationAction(ISD::SELECT, MVT::f16, Promote);
409 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
410 setOperationAction(ISD::SETCC, MVT::f16, Promote);
411 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
412 setOperationAction(ISD::FADD, MVT::f16, Promote);
413 setOperationAction(ISD::FSUB, MVT::f16, Promote);
414 setOperationAction(ISD::FMUL, MVT::f16, Promote);
415 setOperationAction(ISD::FDIV, MVT::f16, Promote);
416 setOperationAction(ISD::FMA, MVT::f16, Promote);
417 setOperationAction(ISD::FNEG, MVT::f16, Promote);
418 setOperationAction(ISD::FABS, MVT::f16, Promote);
419 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
420 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
421 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
422 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
423 setOperationAction(ISD::FRINT, MVT::f16, Promote);
424 setOperationAction(ISD::FROUND, MVT::f16, Promote);
425 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
426 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
427 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
428 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
429 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
430
431 // promote v4f16 to v4f32 when that is known to be safe.
432 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
433 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
434 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
435 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
436 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
437 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
438 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
439 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
440
441 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
442 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
443 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
444 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
445 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
446 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
447 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
448 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
449 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
450 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
451 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
452 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
453 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
454 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
455 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
456
457 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
458 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
459 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
460 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
461 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
462 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
463 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
464 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
465 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
466 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
467 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
468 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
469 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
470 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
471 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
472 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
473 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
474 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
475 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
476 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
477 }
478
479 // AArch64 has implementations of a lot of rounding-like FP operations.
480 for (MVT Ty : {MVT::f32, MVT::f64}) {
481 setOperationAction(ISD::FFLOOR, Ty, Legal);
482 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
483 setOperationAction(ISD::FCEIL, Ty, Legal);
484 setOperationAction(ISD::FRINT, Ty, Legal);
485 setOperationAction(ISD::FTRUNC, Ty, Legal);
486 setOperationAction(ISD::FROUND, Ty, Legal);
487 setOperationAction(ISD::FMINNUM, Ty, Legal);
488 setOperationAction(ISD::FMAXNUM, Ty, Legal);
489 setOperationAction(ISD::FMINIMUM, Ty, Legal);
490 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
491 setOperationAction(ISD::LROUND, Ty, Legal);
492 setOperationAction(ISD::LLROUND, Ty, Legal);
493 setOperationAction(ISD::LRINT, Ty, Legal);
494 setOperationAction(ISD::LLRINT, Ty, Legal);
495 }
496
497 if (Subtarget->hasFullFP16()) {
498 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
499 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
500 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
501 setOperationAction(ISD::FRINT, MVT::f16, Legal);
502 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
503 setOperationAction(ISD::FROUND, MVT::f16, Legal);
504 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
505 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
506 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
507 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
508 }
509
510 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
511
512 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
513
514 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
515 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
516 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
517 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
518 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
519
520 // 128-bit loads and stores can be done without expanding
521 setOperationAction(ISD::LOAD, MVT::i128, Custom);
522 setOperationAction(ISD::STORE, MVT::i128, Custom);
523
524 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
525 // This requires the Performance Monitors extension.
526 if (Subtarget->hasPerfMon())
527 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
528
529 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
530 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
531 // Issue __sincos_stret if available.
532 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
533 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
534 } else {
535 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
536 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
537 }
538
539 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
540 // MSVCRT doesn't have powi; fall back to pow
541 setLibcallName(RTLIB::POWI_F32, nullptr);
542 setLibcallName(RTLIB::POWI_F64, nullptr);
543 }
544
545 // Make floating-point constants legal for the large code model, so they don't
546 // become loads from the constant pool.
547 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
548 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
549 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
550 }
551
552 // AArch64 does not have floating-point extending loads, i1 sign-extending
553 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
554 for (MVT VT : MVT::fp_valuetypes()) {
555 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
556 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
557 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
558 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
559 }
560 for (MVT VT : MVT::integer_valuetypes())
561 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
562
563 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
564 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
565 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
566 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
567 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
568 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
569 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
570
571 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
572 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
573
574 // Indexed loads and stores are supported.
575 for (unsigned im = (unsigned)ISD::PRE_INC;
576 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
577 setIndexedLoadAction(im, MVT::i8, Legal);
578 setIndexedLoadAction(im, MVT::i16, Legal);
579 setIndexedLoadAction(im, MVT::i32, Legal);
580 setIndexedLoadAction(im, MVT::i64, Legal);
581 setIndexedLoadAction(im, MVT::f64, Legal);
582 setIndexedLoadAction(im, MVT::f32, Legal);
583 setIndexedLoadAction(im, MVT::f16, Legal);
584 setIndexedStoreAction(im, MVT::i8, Legal);
585 setIndexedStoreAction(im, MVT::i16, Legal);
586 setIndexedStoreAction(im, MVT::i32, Legal);
587 setIndexedStoreAction(im, MVT::i64, Legal);
588 setIndexedStoreAction(im, MVT::f64, Legal);
589 setIndexedStoreAction(im, MVT::f32, Legal);
590 setIndexedStoreAction(im, MVT::f16, Legal);
591 }
592
593 // Trap.
594 setOperationAction(ISD::TRAP, MVT::Other, Legal);
595 if (Subtarget->isTargetWindows())
596 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
597
598 // We combine OR nodes for bitfield operations.
599 setTargetDAGCombine(ISD::OR);
600 // Try to create BICs for vector ANDs.
601 setTargetDAGCombine(ISD::AND);
602
603 // Vector add and sub nodes may conceal a high-half opportunity.
604 // Also, try to fold ADD into CSINC/CSINV..
605 setTargetDAGCombine(ISD::ADD);
606 setTargetDAGCombine(ISD::SUB);
607 setTargetDAGCombine(ISD::SRL);
608 setTargetDAGCombine(ISD::XOR);
609 setTargetDAGCombine(ISD::SINT_TO_FP);
610 setTargetDAGCombine(ISD::UINT_TO_FP);
611
612 setTargetDAGCombine(ISD::FP_TO_SINT);
613 setTargetDAGCombine(ISD::FP_TO_UINT);
614 setTargetDAGCombine(ISD::FDIV);
615
616 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
617
618 setTargetDAGCombine(ISD::ANY_EXTEND);
619 setTargetDAGCombine(ISD::ZERO_EXTEND);
620 setTargetDAGCombine(ISD::SIGN_EXTEND);
621 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
622 setTargetDAGCombine(ISD::CONCAT_VECTORS);
623 setTargetDAGCombine(ISD::STORE);
624 if (Subtarget->supportsAddressTopByteIgnored())
625 setTargetDAGCombine(ISD::LOAD);
626
627 setTargetDAGCombine(ISD::MUL);
628
629 setTargetDAGCombine(ISD::SELECT);
630 setTargetDAGCombine(ISD::VSELECT);
631
632 setTargetDAGCombine(ISD::INTRINSIC_VOID);
633 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
634 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
635
636 setTargetDAGCombine(ISD::GlobalAddress);
637
638 // In case of strict alignment, avoid an excessive number of byte wide stores.
639 MaxStoresPerMemsetOptSize = 8;
640 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
641 ? MaxStoresPerMemsetOptSize : 32;
642
643 MaxGluedStoresPerMemcpy = 4;
644 MaxStoresPerMemcpyOptSize = 4;
645 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
646 ? MaxStoresPerMemcpyOptSize : 16;
647
648 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
649
650 MaxLoadsPerMemcmpOptSize = 4;
651 MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
652 ? MaxLoadsPerMemcmpOptSize : 8;
653
654 setStackPointerRegisterToSaveRestore(AArch64::SP);
655
656 setSchedulingPreference(Sched::Hybrid);
657
658 EnableExtLdPromotion = true;
659
660 // Set required alignment.
661 setMinFunctionAlignment(Align(4));
662 // Set preferred alignments.
663 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
664 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
665
666 // Only change the limit for entries in a jump table if specified by
667 // the sub target, but not at the command line.
668 unsigned MaxJT = STI.getMaximumJumpTableSize();
669 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
670 setMaximumJumpTableSize(MaxJT);
671
672 setHasExtractBitsInsn(true);
673
674 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
675
676 if (Subtarget->hasNEON()) {
677 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
678 // silliness like this:
679 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
680 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
681 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
682 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
683 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
684 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
685 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
686 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
687 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
688 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
689 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
690 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
691 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
692 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
693 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
694 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
695 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
696 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
697 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
698 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
699 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
700 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
701 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
702 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
703 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
704
705 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
706 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
707 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
708 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
709 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
710
711 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
712
713 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
714 // elements smaller than i32, so promote the input to i32 first.
715 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
716 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
717 // i8 vector elements also need promotion to i32 for v8i8
718 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
719 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
720 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
721 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
722 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
723 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
724 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
725 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
726 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
727 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
728 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
729
730 if (Subtarget->hasFullFP16()) {
731 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
732 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
733 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
734 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
735 } else {
736 // when AArch64 doesn't have fullfp16 support, promote the input
737 // to i32 first.
738 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
739 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
740 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
741 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
742 }
743
744 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
745 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
746
747 // AArch64 doesn't have MUL.2d:
748 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
749 // Custom handling for some quad-vector types to detect MULL.
750 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
751 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
752 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
753
754 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
755 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
756 // Vector reductions
757 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
758 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
759 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
760 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
761 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
762
763 // Saturates
764 setOperationAction(ISD::SADDSAT, VT, Legal);
765 setOperationAction(ISD::UADDSAT, VT, Legal);
766 setOperationAction(ISD::SSUBSAT, VT, Legal);
767 setOperationAction(ISD::USUBSAT, VT, Legal);
768 }
769 for (MVT VT : { MVT::v4f16, MVT::v2f32,
770 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
771 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
772 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
773 }
774
775 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
776 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
777 // Likewise, narrowing and extending vector loads/stores aren't handled
778 // directly.
779 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
780 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
781
782 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
783 setOperationAction(ISD::MULHS, VT, Legal);
784 setOperationAction(ISD::MULHU, VT, Legal);
785 } else {
786 setOperationAction(ISD::MULHS, VT, Expand);
787 setOperationAction(ISD::MULHU, VT, Expand);
788 }
789 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
790 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
791
792 setOperationAction(ISD::BSWAP, VT, Expand);
793 setOperationAction(ISD::CTTZ, VT, Expand);
794
795 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
796 setTruncStoreAction(VT, InnerVT, Expand);
797 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
798 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
799 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
800 }
801 }
802
803 // AArch64 has implementations of a lot of rounding-like FP operations.
804 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
805 setOperationAction(ISD::FFLOOR, Ty, Legal);
806 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
807 setOperationAction(ISD::FCEIL, Ty, Legal);
808 setOperationAction(ISD::FRINT, Ty, Legal);
809 setOperationAction(ISD::FTRUNC, Ty, Legal);
810 setOperationAction(ISD::FROUND, Ty, Legal);
811 }
812
813 if (Subtarget->hasFullFP16()) {
814 for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
815 setOperationAction(ISD::FFLOOR, Ty, Legal);
816 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
817 setOperationAction(ISD::FCEIL, Ty, Legal);
818 setOperationAction(ISD::FRINT, Ty, Legal);
819 setOperationAction(ISD::FTRUNC, Ty, Legal);
820 setOperationAction(ISD::FROUND, Ty, Legal);
821 }
822 }
823
824 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
825 }
826
827 if (Subtarget->hasSVE()) {
828 // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
829 // splat of 0 or undef) once vector selects supported in SVE codegen. See
830 // D68877 for more details.
831 for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
832 if (isTypeLegal(VT))
833 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
834 }
835 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
836 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
837 }
838
839 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
840}
841
842void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
843 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 843, __PRETTY_FUNCTION__))
;
844
845 if (VT.isFloatingPoint()) {
846 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
847 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
848 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
849 }
850
851 // Mark vector float intrinsics as expand.
852 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
853 setOperationAction(ISD::FSIN, VT, Expand);
854 setOperationAction(ISD::FCOS, VT, Expand);
855 setOperationAction(ISD::FPOW, VT, Expand);
856 setOperationAction(ISD::FLOG, VT, Expand);
857 setOperationAction(ISD::FLOG2, VT, Expand);
858 setOperationAction(ISD::FLOG10, VT, Expand);
859 setOperationAction(ISD::FEXP, VT, Expand);
860 setOperationAction(ISD::FEXP2, VT, Expand);
861
862 // But we do support custom-lowering for FCOPYSIGN.
863 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
864 }
865
866 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
867 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
868 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
869 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
870 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
871 setOperationAction(ISD::SRA, VT, Custom);
872 setOperationAction(ISD::SRL, VT, Custom);
873 setOperationAction(ISD::SHL, VT, Custom);
874 setOperationAction(ISD::OR, VT, Custom);
875 setOperationAction(ISD::SETCC, VT, Custom);
876 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
877
878 setOperationAction(ISD::SELECT, VT, Expand);
879 setOperationAction(ISD::SELECT_CC, VT, Expand);
880 setOperationAction(ISD::VSELECT, VT, Expand);
881 for (MVT InnerVT : MVT::all_valuetypes())
882 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
883
884 // CNT supports only B element sizes, then use UADDLP to widen.
885 if (VT != MVT::v8i8 && VT != MVT::v16i8)
886 setOperationAction(ISD::CTPOP, VT, Custom);
887
888 setOperationAction(ISD::UDIV, VT, Expand);
889 setOperationAction(ISD::SDIV, VT, Expand);
890 setOperationAction(ISD::UREM, VT, Expand);
891 setOperationAction(ISD::SREM, VT, Expand);
892 setOperationAction(ISD::FREM, VT, Expand);
893
894 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
895 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
896
897 if (!VT.isFloatingPoint())
898 setOperationAction(ISD::ABS, VT, Legal);
899
900 // [SU][MIN|MAX] are available for all NEON types apart from i64.
901 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
902 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
903 setOperationAction(Opcode, VT, Legal);
904
905 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
906 if (VT.isFloatingPoint() &&
907 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
908 for (unsigned Opcode :
909 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
910 setOperationAction(Opcode, VT, Legal);
911
912 if (Subtarget->isLittleEndian()) {
913 for (unsigned im = (unsigned)ISD::PRE_INC;
914 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
915 setIndexedLoadAction(im, VT, Legal);
916 setIndexedStoreAction(im, VT, Legal);
917 }
918 }
919}
920
921void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
922 addRegisterClass(VT, &AArch64::FPR64RegClass);
923 addTypeForNEON(VT, MVT::v2i32);
924}
925
926void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
927 addRegisterClass(VT, &AArch64::FPR128RegClass);
928 addTypeForNEON(VT, MVT::v4i32);
929}
930
931EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
932 EVT VT) const {
933 if (!VT.isVector())
934 return MVT::i32;
935 return VT.changeVectorElementTypeToInteger();
936}
937
938static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
939 const APInt &Demanded,
940 TargetLowering::TargetLoweringOpt &TLO,
941 unsigned NewOpc) {
942 uint64_t OldImm = Imm, NewImm, Enc;
943 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
944
945 // Return if the immediate is already all zeros, all ones, a bimm32 or a
946 // bimm64.
947 if (Imm == 0 || Imm == Mask ||
948 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
949 return false;
950
951 unsigned EltSize = Size;
952 uint64_t DemandedBits = Demanded.getZExtValue();
953
954 // Clear bits that are not demanded.
955 Imm &= DemandedBits;
956
957 while (true) {
958 // The goal here is to set the non-demanded bits in a way that minimizes
959 // the number of switching between 0 and 1. In order to achieve this goal,
960 // we set the non-demanded bits to the value of the preceding demanded bits.
961 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
962 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
963 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
964 // The final result is 0b11000011.
965 uint64_t NonDemandedBits = ~DemandedBits;
966 uint64_t InvertedImm = ~Imm & DemandedBits;
967 uint64_t RotatedImm =
968 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
969 NonDemandedBits;
970 uint64_t Sum = RotatedImm + NonDemandedBits;
971 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
972 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
973 NewImm = (Imm | Ones) & Mask;
974
975 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
976 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
977 // we halve the element size and continue the search.
978 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
979 break;
980
981 // We cannot shrink the element size any further if it is 2-bits.
982 if (EltSize == 2)
983 return false;
984
985 EltSize /= 2;
986 Mask >>= EltSize;
987 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
988
989 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
990 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
991 return false;
992
993 // Merge the upper and lower halves of Imm and DemandedBits.
994 Imm |= Hi;
995 DemandedBits |= DemandedBitsHi;
996 }
997
998 ++NumOptimizedImms;
999
1000 // Replicate the element across the register width.
1001 while (EltSize < Size) {
1002 NewImm |= NewImm << EltSize;
1003 EltSize *= 2;
1004 }
1005
1006 (void)OldImm;
1007 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1008, __PRETTY_FUNCTION__))
1008 "demanded bits should never be altered")((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1008, __PRETTY_FUNCTION__))
;
1009 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")((OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? static_cast<void> (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1009, __PRETTY_FUNCTION__))
;
1010
1011 // Create the new constant immediate node.
1012 EVT VT = Op.getValueType();
1013 SDLoc DL(Op);
1014 SDValue New;
1015
1016 // If the new constant immediate is all-zeros or all-ones, let the target
1017 // independent DAG combine optimize this node.
1018 if (NewImm == 0 || NewImm == OrigMask) {
1019 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1020 TLO.DAG.getConstant(NewImm, DL, VT));
1021 // Otherwise, create a machine node so that target independent DAG combine
1022 // doesn't undo this optimization.
1023 } else {
1024 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1025 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1026 New = SDValue(
1027 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1028 }
1029
1030 return TLO.CombineTo(Op, New);
1031}
1032
1033bool AArch64TargetLowering::targetShrinkDemandedConstant(
1034 SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
1035 // Delay this optimization to as late as possible.
1036 if (!TLO.LegalOps)
1037 return false;
1038
1039 if (!EnableOptimizeLogicalImm)
1040 return false;
1041
1042 EVT VT = Op.getValueType();
1043 if (VT.isVector())
1044 return false;
1045
1046 unsigned Size = VT.getSizeInBits();
1047 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1048, __PRETTY_FUNCTION__))
1048 "i32 or i64 is expected after legalization.")(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1048, __PRETTY_FUNCTION__))
;
1049
1050 // Exit early if we demand all bits.
1051 if (Demanded.countPopulation() == Size)
1052 return false;
1053
1054 unsigned NewOpc;
1055 switch (Op.getOpcode()) {
1056 default:
1057 return false;
1058 case ISD::AND:
1059 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1060 break;
1061 case ISD::OR:
1062 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1063 break;
1064 case ISD::XOR:
1065 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1066 break;
1067 }
1068 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1069 if (!C)
1070 return false;
1071 uint64_t Imm = C->getZExtValue();
1072 return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
1073}
1074
1075/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1076/// Mask are known to be either zero or one and return them Known.
1077void AArch64TargetLowering::computeKnownBitsForTargetNode(
1078 const SDValue Op, KnownBits &Known,
1079 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1080 switch (Op.getOpcode()) {
1081 default:
1082 break;
1083 case AArch64ISD::CSEL: {
1084 KnownBits Known2;
1085 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1086 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1087 Known.Zero &= Known2.Zero;
1088 Known.One &= Known2.One;
1089 break;
1090 }
1091 case AArch64ISD::LOADgot:
1092 case AArch64ISD::ADDlow: {
1093 if (!Subtarget->isTargetILP32())
1094 break;
1095 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1096 Known.Zero = APInt::getHighBitsSet(64, 32);
1097 break;
1098 }
1099 case ISD::INTRINSIC_W_CHAIN: {
1100 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1101 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1102 switch (IntID) {
1103 default: return;
1104 case Intrinsic::aarch64_ldaxr:
1105 case Intrinsic::aarch64_ldxr: {
1106 unsigned BitWidth = Known.getBitWidth();
1107 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1108 unsigned MemBits = VT.getScalarSizeInBits();
1109 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1110 return;
1111 }
1112 }
1113 break;
1114 }
1115 case ISD::INTRINSIC_WO_CHAIN:
1116 case ISD::INTRINSIC_VOID: {
1117 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1118 switch (IntNo) {
1119 default:
1120 break;
1121 case Intrinsic::aarch64_neon_umaxv:
1122 case Intrinsic::aarch64_neon_uminv: {
1123 // Figure out the datatype of the vector operand. The UMINV instruction
1124 // will zero extend the result, so we can mark as known zero all the
1125 // bits larger than the element datatype. 32-bit or larget doesn't need
1126 // this as those are legal types and will be handled by isel directly.
1127 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1128 unsigned BitWidth = Known.getBitWidth();
1129 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1130 assert(BitWidth >= 8 && "Unexpected width!")((BitWidth >= 8 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1130, __PRETTY_FUNCTION__))
;
1131 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1132 Known.Zero |= Mask;
1133 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1134 assert(BitWidth >= 16 && "Unexpected width!")((BitWidth >= 16 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1134, __PRETTY_FUNCTION__))
;
1135 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1136 Known.Zero |= Mask;
1137 }
1138 break;
1139 } break;
1140 }
1141 }
1142 }
1143}
1144
1145MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1146 EVT) const {
1147 return MVT::i64;
1148}
1149
1150bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1151 EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1152 bool *Fast) const {
1153 if (Subtarget->requiresStrictAlign())
1154 return false;
1155
1156 if (Fast) {
1157 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1158 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1159 // See comments in performSTORECombine() for more details about
1160 // these conditions.
1161
1162 // Code that uses clang vector extensions can mark that it
1163 // wants unaligned accesses to be treated as fast by
1164 // underspecifying alignment to be 1 or 2.
1165 Align <= 2 ||
1166
1167 // Disregard v2i64. Memcpy lowering produces those and splitting
1168 // them regresses performance on micro-benchmarks and olden/bh.
1169 VT == MVT::v2i64;
1170 }
1171 return true;
1172}
1173
1174// Same as above but handling LLTs instead.
1175bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1176 LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1177 bool *Fast) const {
1178 if (Subtarget->requiresStrictAlign())
1179 return false;
1180
1181 if (Fast) {
1182 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1183 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1184 Ty.getSizeInBytes() != 16 ||
1185 // See comments in performSTORECombine() for more details about
1186 // these conditions.
1187
1188 // Code that uses clang vector extensions can mark that it
1189 // wants unaligned accesses to be treated as fast by
1190 // underspecifying alignment to be 1 or 2.
1191 Align <= 2 ||
1192
1193 // Disregard v2i64. Memcpy lowering produces those and splitting
1194 // them regresses performance on micro-benchmarks and olden/bh.
1195 Ty == LLT::vector(2, 64);
1196 }
1197 return true;
1198}
1199
1200FastISel *
1201AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1202 const TargetLibraryInfo *libInfo) const {
1203 return AArch64::createFastISel(funcInfo, libInfo);
1204}
1205
1206const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1207 switch ((AArch64ISD::NodeType)Opcode) {
1208 case AArch64ISD::FIRST_NUMBER: break;
1209 case AArch64ISD::CALL: return "AArch64ISD::CALL";
1210 case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
1211 case AArch64ISD::ADR: return "AArch64ISD::ADR";
1212 case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
1213 case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
1214 case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
1215 case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
1216 case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
1217 case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
1218 case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
1219 case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
1220 case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
1221 case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
1222 case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1223 case AArch64ISD::ADC: return "AArch64ISD::ADC";
1224 case AArch64ISD::SBC: return "AArch64ISD::SBC";
1225 case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
1226 case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
1227 case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
1228 case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
1229 case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
1230 case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
1231 case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
1232 case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
1233 case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
1234 case AArch64ISD::DUP: return "AArch64ISD::DUP";
1235 case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
1236 case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
1237 case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
1238 case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
1239 case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
1240 case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
1241 case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
1242 case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
1243 case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
1244 case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
1245 case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
1246 case AArch64ISD::BICi: return "AArch64ISD::BICi";
1247 case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
1248 case AArch64ISD::BSL: return "AArch64ISD::BSL";
1249 case AArch64ISD::NEG: return "AArch64ISD::NEG";
1250 case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
1251 case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
1252 case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
1253 case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
1254 case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
1255 case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
1256 case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
1257 case AArch64ISD::REV16: return "AArch64ISD::REV16";
1258 case AArch64ISD::REV32: return "AArch64ISD::REV32";
1259 case AArch64ISD::REV64: return "AArch64ISD::REV64";
1260 case AArch64ISD::EXT: return "AArch64ISD::EXT";
1261 case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
1262 case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
1263 case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
1264 case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
1265 case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
1266 case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
1267 case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
1268 case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
1269 case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
1270 case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
1271 case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
1272 case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
1273 case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
1274 case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
1275 case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
1276 case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
1277 case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
1278 case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
1279 case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
1280 case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
1281 case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
1282 case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
1283 case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
1284 case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
1285 case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
1286 case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
1287 case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
1288 case AArch64ISD::SMAXV_PRED: return "AArch64ISD::SMAXV_PRED";
1289 case AArch64ISD::UMAXV_PRED: return "AArch64ISD::UMAXV_PRED";
1290 case AArch64ISD::SMINV_PRED: return "AArch64ISD::SMINV_PRED";
1291 case AArch64ISD::UMINV_PRED: return "AArch64ISD::UMINV_PRED";
1292 case AArch64ISD::ORV_PRED: return "AArch64ISD::ORV_PRED";
1293 case AArch64ISD::EORV_PRED: return "AArch64ISD::EORV_PRED";
1294 case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED";
1295 case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N";
1296 case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N";
1297 case AArch64ISD::LASTA: return "AArch64ISD::LASTA";
1298 case AArch64ISD::LASTB: return "AArch64ISD::LASTB";
1299 case AArch64ISD::REV: return "AArch64ISD::REV";
1300 case AArch64ISD::TBL: return "AArch64ISD::TBL";
1301 case AArch64ISD::NOT: return "AArch64ISD::NOT";
1302 case AArch64ISD::BIT: return "AArch64ISD::BIT";
1303 case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
1304 case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
1305 case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
1306 case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
1307 case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
1308 case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
1309 case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
1310 case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
1311 case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
1312 case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
1313 case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
1314 case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
1315 case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
1316 case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
1317 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
1318 case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
1319 case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
1320 case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
1321 case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
1322 case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
1323 case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
1324 case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
1325 case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
1326 case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
1327 case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
1328 case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
1329 case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1330 case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1331 case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1332 case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1333 case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1334 case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1335 case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1336 case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1337 case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1338 case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1339 case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1340 case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1341 case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1342 case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1343 case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1344 case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1345 case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1346 case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1347 case AArch64ISD::STG: return "AArch64ISD::STG";
1348 case AArch64ISD::STZG: return "AArch64ISD::STZG";
1349 case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
1350 case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
1351 case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI";
1352 case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";
1353 case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
1354 case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
1355 case AArch64ISD::INSR: return "AArch64ISD::INSR";
1356 case AArch64ISD::PTRUE: return "AArch64ISD::PTRUE";
1357 case AArch64ISD::GLD1: return "AArch64ISD::GLD1";
1358 case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED";
1359 case AArch64ISD::GLD1_SXTW: return "AArch64ISD::GLD1_SXTW";
1360 case AArch64ISD::GLD1_UXTW: return "AArch64ISD::GLD1_UXTW";
1361 case AArch64ISD::GLD1_SXTW_SCALED: return "AArch64ISD::GLD1_SXTW_SCALED";
1362 case AArch64ISD::GLD1_UXTW_SCALED: return "AArch64ISD::GLD1_UXTW_SCALED";
1363 case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM";
1364 case AArch64ISD::GLD1S: return "AArch64ISD::GLD1S";
1365 case AArch64ISD::GLD1S_SCALED: return "AArch64ISD::GLD1S_SCALED";
1366 case AArch64ISD::GLD1S_SXTW: return "AArch64ISD::GLD1S_SXTW";
1367 case AArch64ISD::GLD1S_UXTW: return "AArch64ISD::GLD1S_UXTW";
1368 case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";
1369 case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";
1370 case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";
1371 case AArch64ISD::SST1: return "AArch64ISD::SST1";
1372 case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
1373 case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
1374 case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
1375 case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
1376 case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
1377 case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";
1378 case AArch64ISD::LDP: return "AArch64ISD::LDP";
1379 case AArch64ISD::STP: return "AArch64ISD::STP";
1380 }
1381 return nullptr;
1382}
1383
1384MachineBasicBlock *
1385AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
1386 MachineBasicBlock *MBB) const {
1387 // We materialise the F128CSEL pseudo-instruction as some control flow and a
1388 // phi node:
1389
1390 // OrigBB:
1391 // [... previous instrs leading to comparison ...]
1392 // b.ne TrueBB
1393 // b EndBB
1394 // TrueBB:
1395 // ; Fallthrough
1396 // EndBB:
1397 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1398
1399 MachineFunction *MF = MBB->getParent();
1400 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1401 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1402 DebugLoc DL = MI.getDebugLoc();
1403 MachineFunction::iterator It = ++MBB->getIterator();
1404
1405 Register DestReg = MI.getOperand(0).getReg();
1406 Register IfTrueReg = MI.getOperand(1).getReg();
1407 Register IfFalseReg = MI.getOperand(2).getReg();
1408 unsigned CondCode = MI.getOperand(3).getImm();
1409 bool NZCVKilled = MI.getOperand(4).isKill();
1410
1411 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1412 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1413 MF->insert(It, TrueBB);
1414 MF->insert(It, EndBB);
1415
1416 // Transfer rest of current basic-block to EndBB
1417 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1418 MBB->end());
1419 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1420
1421 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1422 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1423 MBB->addSuccessor(TrueBB);
1424 MBB->addSuccessor(EndBB);
1425
1426 // TrueBB falls through to the end.
1427 TrueBB->addSuccessor(EndBB);
1428
1429 if (!NZCVKilled) {
1430 TrueBB->addLiveIn(AArch64::NZCV);
1431 EndBB->addLiveIn(AArch64::NZCV);
1432 }
1433
1434 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1435 .addReg(IfTrueReg)
1436 .addMBB(TrueBB)
1437 .addReg(IfFalseReg)
1438 .addMBB(MBB);
1439
1440 MI.eraseFromParent();
1441 return EndBB;
1442}
1443
1444MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
1445 MachineInstr &MI, MachineBasicBlock *BB) const {
1446 assert(!isAsynchronousEHPersonality(classifyEHPersonality(((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1448, __PRETTY_FUNCTION__))
1447 BB->getParent()->getFunction().getPersonalityFn())) &&((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1448, __PRETTY_FUNCTION__))
1448 "SEH does not use catchret!")((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1448, __PRETTY_FUNCTION__))
;
1449 return BB;
1450}
1451
1452MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad(
1453 MachineInstr &MI, MachineBasicBlock *BB) const {
1454 MI.eraseFromParent();
1455 return BB;
1456}
1457
1458MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
1459 MachineInstr &MI, MachineBasicBlock *BB) const {
1460 switch (MI.getOpcode()) {
1461 default:
1462#ifndef NDEBUG
1463 MI.dump();
1464#endif
1465 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1465)
;
1466
1467 case AArch64::F128CSEL:
1468 return EmitF128CSEL(MI, BB);
1469
1470 case TargetOpcode::STACKMAP:
1471 case TargetOpcode::PATCHPOINT:
1472 return emitPatchPoint(MI, BB);
1473
1474 case AArch64::CATCHRET:
1475 return EmitLoweredCatchRet(MI, BB);
1476 case AArch64::CATCHPAD:
1477 return EmitLoweredCatchPad(MI, BB);
1478 }
1479}
1480
1481//===----------------------------------------------------------------------===//
1482// AArch64 Lowering private implementation.
1483//===----------------------------------------------------------------------===//
1484
1485//===----------------------------------------------------------------------===//
1486// Lowering Code
1487//===----------------------------------------------------------------------===//
1488
1489/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1490/// CC
1491static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1492 switch (CC) {
1493 default:
1494 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1494)
;
1495 case ISD::SETNE:
1496 return AArch64CC::NE;
1497 case ISD::SETEQ:
1498 return AArch64CC::EQ;
1499 case ISD::SETGT:
1500 return AArch64CC::GT;
1501 case ISD::SETGE:
1502 return AArch64CC::GE;
1503 case ISD::SETLT:
1504 return AArch64CC::LT;
1505 case ISD::SETLE:
1506 return AArch64CC::LE;
1507 case ISD::SETUGT:
1508 return AArch64CC::HI;
1509 case ISD::SETUGE:
1510 return AArch64CC::HS;
1511 case ISD::SETULT:
1512 return AArch64CC::LO;
1513 case ISD::SETULE:
1514 return AArch64CC::LS;
1515 }
1516}
1517
1518/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1519static void changeFPCCToAArch64CC(ISD::CondCode CC,
1520 AArch64CC::CondCode &CondCode,
1521 AArch64CC::CondCode &CondCode2) {
1522 CondCode2 = AArch64CC::AL;
1523 switch (CC) {
1524 default:
1525 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1525)
;
1526 case ISD::SETEQ:
1527 case ISD::SETOEQ:
1528 CondCode = AArch64CC::EQ;
1529 break;
1530 case ISD::SETGT:
1531 case ISD::SETOGT:
1532 CondCode = AArch64CC::GT;
1533 break;
1534 case ISD::SETGE:
1535 case ISD::SETOGE:
1536 CondCode = AArch64CC::GE;
1537 break;
1538 case ISD::SETOLT:
1539 CondCode = AArch64CC::MI;
1540 break;
1541 case ISD::SETOLE:
1542 CondCode = AArch64CC::LS;
1543 break;
1544 case ISD::SETONE:
1545 CondCode = AArch64CC::MI;
1546 CondCode2 = AArch64CC::GT;
1547 break;
1548 case ISD::SETO:
1549 CondCode = AArch64CC::VC;
1550 break;
1551 case ISD::SETUO:
1552 CondCode = AArch64CC::VS;
1553 break;
1554 case ISD::SETUEQ:
1555 CondCode = AArch64CC::EQ;
1556 CondCode2 = AArch64CC::VS;
1557 break;
1558 case ISD::SETUGT:
1559 CondCode = AArch64CC::HI;
1560 break;
1561 case ISD::SETUGE:
1562 CondCode = AArch64CC::PL;
1563 break;
1564 case ISD::SETLT:
1565 case ISD::SETULT:
1566 CondCode = AArch64CC::LT;
1567 break;
1568 case ISD::SETLE:
1569 case ISD::SETULE:
1570 CondCode = AArch64CC::LE;
1571 break;
1572 case ISD::SETNE:
1573 case ISD::SETUNE:
1574 CondCode = AArch64CC::NE;
1575 break;
1576 }
1577}
1578
1579/// Convert a DAG fp condition code to an AArch64 CC.
1580/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1581/// should be AND'ed instead of OR'ed.
1582static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1583 AArch64CC::CondCode &CondCode,
1584 AArch64CC::CondCode &CondCode2) {
1585 CondCode2 = AArch64CC::AL;
1586 switch (CC) {
1587 default:
1588 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1589 assert(CondCode2 == AArch64CC::AL)((CondCode2 == AArch64CC::AL) ? static_cast<void> (0) :
__assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1589, __PRETTY_FUNCTION__))
;
1590 break;
1591 case ISD::SETONE:
1592 // (a one b)
1593 // == ((a olt b) || (a ogt b))
1594 // == ((a ord b) && (a une b))
1595 CondCode = AArch64CC::VC;
1596 CondCode2 = AArch64CC::NE;
1597 break;
1598 case ISD::SETUEQ:
1599 // (a ueq b)
1600 // == ((a uno b) || (a oeq b))
1601 // == ((a ule b) && (a uge b))
1602 CondCode = AArch64CC::PL;
1603 CondCode2 = AArch64CC::LE;
1604 break;
1605 }
1606}
1607
1608/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1609/// CC usable with the vector instructions. Fewer operations are available
1610/// without a real NZCV register, so we have to use less efficient combinations
1611/// to get the same effect.
1612static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1613 AArch64CC::CondCode &CondCode,
1614 AArch64CC::CondCode &CondCode2,
1615 bool &Invert) {
1616 Invert = false;
1617 switch (CC) {
1618 default:
1619 // Mostly the scalar mappings work fine.
1620 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1621 break;
1622 case ISD::SETUO:
1623 Invert = true;
1624 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1625 case ISD::SETO:
1626 CondCode = AArch64CC::MI;
1627 CondCode2 = AArch64CC::GE;
1628 break;
1629 case ISD::SETUEQ:
1630 case ISD::SETULT:
1631 case ISD::SETULE:
1632 case ISD::SETUGT:
1633 case ISD::SETUGE:
1634 // All of the compare-mask comparisons are ordered, but we can switch
1635 // between the two by a double inversion. E.g. ULE == !OGT.
1636 Invert = true;
1637 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
1638 CondCode, CondCode2);
1639 break;
1640 }
1641}
1642
1643static bool isLegalArithImmed(uint64_t C) {
1644 // Matches AArch64DAGToDAGISel::SelectArithImmed().
1645 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1646 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
1647 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
1648 return IsLegal;
1649}
1650
1651// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
1652// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
1653// can be set differently by this operation. It comes down to whether
1654// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1655// everything is fine. If not then the optimization is wrong. Thus general
1656// comparisons are only valid if op2 != 0.
1657//
1658// So, finally, the only LLVM-native comparisons that don't mention C and V
1659// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1660// the absence of information about op2.
1661static bool isCMN(SDValue Op, ISD::CondCode CC) {
1662 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
1663 (CC == ISD::SETEQ || CC == ISD::SETNE);
1664}
1665
1666static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1667 const SDLoc &dl, SelectionDAG &DAG) {
1668 EVT VT = LHS.getValueType();
1669 const bool FullFP16 =
1670 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1671
1672 if (VT.isFloatingPoint()) {
1673 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1673, __PRETTY_FUNCTION__))
;
1674 if (VT == MVT::f16 && !FullFP16) {
1675 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1676 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1677 VT = MVT::f32;
1678 }
1679 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1680 }
1681
1682 // The CMP instruction is just an alias for SUBS, and representing it as
1683 // SUBS means that it's possible to get CSE with subtract operations.
1684 // A later phase can perform the optimization of setting the destination
1685 // register to WZR/XZR if it ends up being unused.
1686 unsigned Opcode = AArch64ISD::SUBS;
1687
1688 if (isCMN(RHS, CC)) {
1689 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
1690 Opcode = AArch64ISD::ADDS;
1691 RHS = RHS.getOperand(1);
1692 } else if (isCMN(LHS, CC)) {
1693 // As we are looking for EQ/NE compares, the operands can be commuted ; can
1694 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
1695 Opcode = AArch64ISD::ADDS;
1696 LHS = LHS.getOperand(1);
1697 } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
1698 !isUnsignedIntSetCC(CC)) {
1699 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1700 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1701 // of the signed comparisons.
1702 Opcode = AArch64ISD::ANDS;
1703 RHS = LHS.getOperand(1);
1704 LHS = LHS.getOperand(0);
1705 }
1706
1707 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1708 .getValue(1);
1709}
1710
1711/// \defgroup AArch64CCMP CMP;CCMP matching
1712///
1713/// These functions deal with the formation of CMP;CCMP;... sequences.
1714/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1715/// a comparison. They set the NZCV flags to a predefined value if their
1716/// predicate is false. This allows to express arbitrary conjunctions, for
1717/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
1718/// expressed as:
1719/// cmp A
1720/// ccmp B, inv(CB), CA
1721/// check for CB flags
1722///
1723/// This naturally lets us implement chains of AND operations with SETCC
1724/// operands. And we can even implement some other situations by transforming
1725/// them:
1726/// - We can implement (NEG SETCC) i.e. negating a single comparison by
1727/// negating the flags used in a CCMP/FCCMP operations.
1728/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
1729/// by negating the flags we test for afterwards. i.e.
1730/// NEG (CMP CCMP CCCMP ...) can be implemented.
1731/// - Note that we can only ever negate all previously processed results.
1732/// What we can not implement by flipping the flags to test is a negation
1733/// of two sub-trees (because the negation affects all sub-trees emitted so
1734/// far, so the 2nd sub-tree we emit would also affect the first).
1735/// With those tools we can implement some OR operations:
1736/// - (OR (SETCC A) (SETCC B)) can be implemented via:
1737/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
1738/// - After transforming OR to NEG/AND combinations we may be able to use NEG
1739/// elimination rules from earlier to implement the whole thing as a
1740/// CCMP/FCCMP chain.
1741///
1742/// As complete example:
1743/// or (or (setCA (cmp A)) (setCB (cmp B)))
1744/// (and (setCC (cmp C)) (setCD (cmp D)))"
1745/// can be reassociated to:
1746/// or (and (setCC (cmp C)) setCD (cmp D))
1747// (or (setCA (cmp A)) (setCB (cmp B)))
1748/// can be transformed to:
1749/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
1750/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1751/// which can be implemented as:
1752/// cmp C
1753/// ccmp D, inv(CD), CC
1754/// ccmp A, CA, inv(CD)
1755/// ccmp B, CB, inv(CA)
1756/// check for CB flags
1757///
1758/// A counterexample is "or (and A B) (and C D)" which translates to
1759/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
1760/// can only implement 1 of the inner (not) operations, but not both!
1761/// @{
1762
1763/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1764static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1765 ISD::CondCode CC, SDValue CCOp,
1766 AArch64CC::CondCode Predicate,
1767 AArch64CC::CondCode OutCC,
1768 const SDLoc &DL, SelectionDAG &DAG) {
1769 unsigned Opcode = 0;
1770 const bool FullFP16 =
1771 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1772
1773 if (LHS.getValueType().isFloatingPoint()) {
1774 assert(LHS.getValueType() != MVT::f128)((LHS.getValueType() != MVT::f128) ? static_cast<void> (
0) : __assert_fail ("LHS.getValueType() != MVT::f128", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1774, __PRETTY_FUNCTION__))
;
1775 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
1776 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1777 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1778 }
1779 Opcode = AArch64ISD::FCCMP;
1780 } else if (RHS.getOpcode() == ISD::SUB) {
1781 SDValue SubOp0 = RHS.getOperand(0);
1782 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1783 // See emitComparison() on why we can only do this for SETEQ and SETNE.
1784 Opcode = AArch64ISD::CCMN;
1785 RHS = RHS.getOperand(1);
1786 }
1787 }
1788 if (Opcode == 0)
1789 Opcode = AArch64ISD::CCMP;
1790
1791 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1792 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1793 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1794 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1795 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1796}
1797
1798/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
1799/// expressed as a conjunction. See \ref AArch64CCMP.
1800/// \param CanNegate Set to true if we can negate the whole sub-tree just by
1801/// changing the conditions on the SETCC tests.
1802/// (this means we can call emitConjunctionRec() with
1803/// Negate==true on this sub-tree)
1804/// \param MustBeFirst Set to true if this subtree needs to be negated and we
1805/// cannot do the negation naturally. We are required to
1806/// emit the subtree first in this case.
1807/// \param WillNegate Is true if are called when the result of this
1808/// subexpression must be negated. This happens when the
1809/// outer expression is an OR. We can use this fact to know
1810/// that we have a double negation (or (or ...) ...) that
1811/// can be implemented for free.
1812static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
1813 bool &MustBeFirst, bool WillNegate,
1814 unsigned Depth = 0) {
1815 if (!Val.hasOneUse())
1816 return false;
1817 unsigned Opcode = Val->getOpcode();
1818 if (Opcode == ISD::SETCC) {
1819 if (Val->getOperand(0).getValueType() == MVT::f128)
1820 return false;
1821 CanNegate = true;
1822 MustBeFirst = false;
1823 return true;
1824 }
1825 // Protect against exponential runtime and stack overflow.
1826 if (Depth > 6)
1827 return false;
1828 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1829 bool IsOR = Opcode == ISD::OR;
1830 SDValue O0 = Val->getOperand(0);
1831 SDValue O1 = Val->getOperand(1);
1832 bool CanNegateL;
1833 bool MustBeFirstL;
1834 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
1835 return false;
1836 bool CanNegateR;
1837 bool MustBeFirstR;
1838 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
1839 return false;
1840
1841 if (MustBeFirstL && MustBeFirstR)
1842 return false;
1843
1844 if (IsOR) {
1845 // For an OR expression we need to be able to naturally negate at least
1846 // one side or we cannot do the transformation at all.
1847 if (!CanNegateL && !CanNegateR)
1848 return false;
1849 // If we the result of the OR will be negated and we can naturally negate
1850 // the leafs, then this sub-tree as a whole negates naturally.
1851 CanNegate = WillNegate && CanNegateL && CanNegateR;
1852 // If we cannot naturally negate the whole sub-tree, then this must be
1853 // emitted first.
1854 MustBeFirst = !CanNegate;
1855 } else {
1856 assert(Opcode == ISD::AND && "Must be OR or AND")((Opcode == ISD::AND && "Must be OR or AND") ? static_cast
<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1856, __PRETTY_FUNCTION__))
;
1857 // We cannot naturally negate an AND operation.
1858 CanNegate = false;
1859 MustBeFirst = MustBeFirstL || MustBeFirstR;
1860 }
1861 return true;
1862 }
1863 return false;
1864}
1865
1866/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1867/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1868/// Tries to transform the given i1 producing node @p Val to a series compare
1869/// and conditional compare operations. @returns an NZCV flags producing node
1870/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1871/// transformation was not possible.
1872/// \p Negate is true if we want this sub-tree being negated just by changing
1873/// SETCC conditions.
1874static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
1875 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1876 AArch64CC::CondCode Predicate) {
1877 // We're at a tree leaf, produce a conditional comparison operation.
1878 unsigned Opcode = Val->getOpcode();
1879 if (Opcode == ISD::SETCC) {
1880 SDValue LHS = Val->getOperand(0);
1881 SDValue RHS = Val->getOperand(1);
1882 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1883 bool isInteger = LHS.getValueType().isInteger();
1884 if (Negate)
1885 CC = getSetCCInverse(CC, LHS.getValueType());
1886 SDLoc DL(Val);
1887 // Determine OutCC and handle FP special case.
1888 if (isInteger) {
1889 OutCC = changeIntCCToAArch64CC(CC);
1890 } else {
1891 assert(LHS.getValueType().isFloatingPoint())((LHS.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1891, __PRETTY_FUNCTION__))
;
1892 AArch64CC::CondCode ExtraCC;
1893 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1894 // Some floating point conditions can't be tested with a single condition
1895 // code. Construct an additional comparison in this case.
1896 if (ExtraCC != AArch64CC::AL) {
1897 SDValue ExtraCmp;
1898 if (!CCOp.getNode())
1899 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1900 else
1901 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1902 ExtraCC, DL, DAG);
1903 CCOp = ExtraCmp;
1904 Predicate = ExtraCC;
1905 }
1906 }
1907
1908 // Produce a normal comparison if we are first in the chain
1909 if (!CCOp)
1910 return emitComparison(LHS, RHS, CC, DL, DAG);
1911 // Otherwise produce a ccmp.
1912 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
1913 DAG);
1914 }
1915 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")((Val->hasOneUse() && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1915, __PRETTY_FUNCTION__))
;
1916
1917 bool IsOR = Opcode == ISD::OR;
1918
1919 SDValue LHS = Val->getOperand(0);
1920 bool CanNegateL;
1921 bool MustBeFirstL;
1922 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
1923 assert(ValidL && "Valid conjunction/disjunction tree")((ValidL && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1923, __PRETTY_FUNCTION__))
;
1924 (void)ValidL;
1925
1926 SDValue RHS = Val->getOperand(1);
1927 bool CanNegateR;
1928 bool MustBeFirstR;
1929 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
1930 assert(ValidR && "Valid conjunction/disjunction tree")((ValidR && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1930, __PRETTY_FUNCTION__))
;
1931 (void)ValidR;
1932
1933 // Swap sub-tree that must come first to the right side.
1934 if (MustBeFirstL) {
1935 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")((!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1935, __PRETTY_FUNCTION__))
;
1936 std::swap(LHS, RHS);
1937 std::swap(CanNegateL, CanNegateR);
1938 std::swap(MustBeFirstL, MustBeFirstR);
1939 }
1940
1941 bool NegateR;
1942 bool NegateAfterR;
1943 bool NegateL;
1944 bool NegateAfterAll;
1945 if (Opcode == ISD::OR) {
1946 // Swap the sub-tree that we can negate naturally to the left.
1947 if (!CanNegateL) {
1948 assert(CanNegateR && "at least one side must be negatable")((CanNegateR && "at least one side must be negatable"
) ? static_cast<void> (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1948, __PRETTY_FUNCTION__))
;
1949 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")((!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1949, __PRETTY_FUNCTION__))
;
1950 assert(!Negate)((!Negate) ? static_cast<void> (0) : __assert_fail ("!Negate"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1950, __PRETTY_FUNCTION__))
;
1951 std::swap(LHS, RHS);
1952 NegateR = false;
1953 NegateAfterR = true;
1954 } else {
1955 // Negate the left sub-tree if possible, otherwise negate the result.
1956 NegateR = CanNegateR;
1957 NegateAfterR = !CanNegateR;
1958 }
1959 NegateL = true;
1960 NegateAfterAll = !Negate;
1961 } else {
1962 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")((Opcode == ISD::AND && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1962, __PRETTY_FUNCTION__))
;
1963 assert(!Negate && "Valid conjunction/disjunction tree")((!Negate && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1963, __PRETTY_FUNCTION__))
;
1964
1965 NegateL = false;
1966 NegateR = false;
1967 NegateAfterR = false;
1968 NegateAfterAll = false;
1969 }
1970
1971 // Emit sub-trees.
1972 AArch64CC::CondCode RHSCC;
1973 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
1974 if (NegateAfterR)
1975 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1976 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
1977 if (NegateAfterAll)
1978 OutCC = AArch64CC::getInvertedCondCode(OutCC);
1979 return CmpL;
1980}
1981
1982/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
1983/// In some cases this is even possible with OR operations in the expression.
1984/// See \ref AArch64CCMP.
1985/// \see emitConjunctionRec().
1986static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
1987 AArch64CC::CondCode &OutCC) {
1988 bool DummyCanNegate;
1989 bool DummyMustBeFirst;
1990 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
1991 return SDValue();
1992
1993 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
1994}
1995
1996/// @}
1997
1998/// Returns how profitable it is to fold a comparison's operand's shift and/or
1999/// extension operations.
2000static unsigned getCmpOperandFoldingProfit(SDValue Op) {
2001 auto isSupportedExtend = [&](SDValue V) {
2002 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2003 return true;
2004
2005 if (V.getOpcode() == ISD::AND)
2006 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2007 uint64_t Mask = MaskCst->getZExtValue();
2008 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2009 }
2010
2011 return false;
2012 };
2013
2014 if (!Op.hasOneUse())
2015 return 0;
2016
2017 if (isSupportedExtend(Op))
2018 return 1;
2019
2020 unsigned Opc = Op.getOpcode();
2021 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2022 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2023 uint64_t Shift = ShiftCst->getZExtValue();
2024 if (isSupportedExtend(Op.getOperand(0)))
2025 return (Shift <= 4) ? 2 : 1;
2026 EVT VT = Op.getValueType();
2027 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2028 return 1;
2029 }
2030
2031 return 0;
2032}
2033
2034static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2035 SDValue &AArch64cc, SelectionDAG &DAG,
2036 const SDLoc &dl) {
2037 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2038 EVT VT = RHS.getValueType();
2039 uint64_t C = RHSC->getZExtValue();
2040 if (!isLegalArithImmed(C)) {
2041 // Constant does not fit, try adjusting it by one?
2042 switch (CC) {
2043 default:
2044 break;
2045 case ISD::SETLT:
2046 case ISD::SETGE:
2047 if ((VT == MVT::i32 && C != 0x80000000 &&
2048 isLegalArithImmed((uint32_t)(C - 1))) ||
2049 (VT == MVT::i64 && C != 0x80000000ULL &&
2050 isLegalArithImmed(C - 1ULL))) {
2051 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2052 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2053 RHS = DAG.getConstant(C, dl, VT);
2054 }
2055 break;
2056 case ISD::SETULT:
2057 case ISD::SETUGE:
2058 if ((VT == MVT::i32 && C != 0 &&
2059 isLegalArithImmed((uint32_t)(C - 1))) ||
2060 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2061 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2062 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2063 RHS = DAG.getConstant(C, dl, VT);
2064 }
2065 break;
2066 case ISD::SETLE:
2067 case ISD::SETGT:
2068 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
2069 isLegalArithImmed((uint32_t)(C + 1))) ||
2070 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
2071 isLegalArithImmed(C + 1ULL))) {
2072 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2073 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2074 RHS = DAG.getConstant(C, dl, VT);
2075 }
2076 break;
2077 case ISD::SETULE:
2078 case ISD::SETUGT:
2079 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
2080 isLegalArithImmed((uint32_t)(C + 1))) ||
2081 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
2082 isLegalArithImmed(C + 1ULL))) {
2083 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2084 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2085 RHS = DAG.getConstant(C, dl, VT);
2086 }
2087 break;
2088 }
2089 }
2090 }
2091
2092 // Comparisons are canonicalized so that the RHS operand is simpler than the
2093 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2094 // can fold some shift+extend operations on the RHS operand, so swap the
2095 // operands if that can be done.
2096 //
2097 // For example:
2098 // lsl w13, w11, #1
2099 // cmp w13, w12
2100 // can be turned into:
2101 // cmp w12, w11, lsl #1
2102 if (!isa<ConstantSDNode>(RHS) ||
2103 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2104 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2105
2106 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
2107 std::swap(LHS, RHS);
2108 CC = ISD::getSetCCSwappedOperands(CC);
2109 }
2110 }
2111
2112 SDValue Cmp;
2113 AArch64CC::CondCode AArch64CC;
2114 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2115 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2116
2117 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2118 // For the i8 operand, the largest immediate is 255, so this can be easily
2119 // encoded in the compare instruction. For the i16 operand, however, the
2120 // largest immediate cannot be encoded in the compare.
2121 // Therefore, use a sign extending load and cmn to avoid materializing the
2122 // -1 constant. For example,
2123 // movz w1, #65535
2124 // ldrh w0, [x0, #0]
2125 // cmp w0, w1
2126 // >
2127 // ldrsh w0, [x0, #0]
2128 // cmn w0, #1
2129 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2130 // if and only if (sext LHS) == (sext RHS). The checks are in place to
2131 // ensure both the LHS and RHS are truly zero extended and to make sure the
2132 // transformation is profitable.
2133 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2134 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2135 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2136 LHS.getNode()->hasNUsesOfValue(1, 0)) {
2137 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2138 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2139 SDValue SExt =
2140 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2141 DAG.getValueType(MVT::i16));
2142 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2143 RHS.getValueType()),
2144 CC, dl, DAG);
2145 AArch64CC = changeIntCCToAArch64CC(CC);
2146 }
2147 }
2148
2149 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2150 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2151 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2152 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2153 }
2154 }
2155 }
2156
2157 if (!Cmp) {
2158 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
2159 AArch64CC = changeIntCCToAArch64CC(CC);
2160 }
2161 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
2162 return Cmp;
2163}
2164
2165static std::pair<SDValue, SDValue>
2166getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
2167 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2168, __PRETTY_FUNCTION__))
2168 "Unsupported value type")(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2168, __PRETTY_FUNCTION__))
;
2169 SDValue Value, Overflow;
2170 SDLoc DL(Op);
2171 SDValue LHS = Op.getOperand(0);
2172 SDValue RHS = Op.getOperand(1);
2173 unsigned Opc = 0;
2174 switch (Op.getOpcode()) {
2175 default:
2176 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2176)
;
2177 case ISD::SADDO:
2178 Opc = AArch64ISD::ADDS;
2179 CC = AArch64CC::VS;
2180 break;
2181 case ISD::UADDO:
2182 Opc = AArch64ISD::ADDS;
2183 CC = AArch64CC::HS;
2184 break;
2185 case ISD::SSUBO:
2186 Opc = AArch64ISD::SUBS;
2187 CC = AArch64CC::VS;
2188 break;
2189 case ISD::USUBO:
2190 Opc = AArch64ISD::SUBS;
2191 CC = AArch64CC::LO;
2192 break;
2193 // Multiply needs a little bit extra work.
2194 case ISD::SMULO:
2195 case ISD::UMULO: {
2196 CC = AArch64CC::NE;
2197 bool IsSigned = Op.getOpcode() == ISD::SMULO;
2198 if (Op.getValueType() == MVT::i32) {
2199 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2200 // For a 32 bit multiply with overflow check we want the instruction
2201 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
2202 // need to generate the following pattern:
2203 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
2204 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
2205 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
2206 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2207 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
2208 DAG.getConstant(0, DL, MVT::i64));
2209 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
2210 // operation. We need to clear out the upper 32 bits, because we used a
2211 // widening multiply that wrote all 64 bits. In the end this should be a
2212 // noop.
2213 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
2214 if (IsSigned) {
2215 // The signed overflow check requires more than just a simple check for
2216 // any bit set in the upper 32 bits of the result. These bits could be
2217 // just the sign bits of a negative number. To perform the overflow
2218 // check we have to arithmetic shift right the 32nd bit of the result by
2219 // 31 bits. Then we compare the result to the upper 32 bits.
2220 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
2221 DAG.getConstant(32, DL, MVT::i64));
2222 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
2223 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
2224 DAG.getConstant(31, DL, MVT::i64));
2225 // It is important that LowerBits is last, otherwise the arithmetic
2226 // shift will not be folded into the compare (SUBS).
2227 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
2228 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2229 .getValue(1);
2230 } else {
2231 // The overflow check for unsigned multiply is easy. We only need to
2232 // check if any of the upper 32 bits are set. This can be done with a
2233 // CMP (shifted register). For that we need to generate the following
2234 // pattern:
2235 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
2236 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2237 DAG.getConstant(32, DL, MVT::i64));
2238 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2239 Overflow =
2240 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2241 DAG.getConstant(0, DL, MVT::i64),
2242 UpperBits).getValue(1);
2243 }
2244 break;
2245 }
2246 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")((Op.getValueType() == MVT::i64 && "Expected an i64 value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2246, __PRETTY_FUNCTION__))
;
2247 // For the 64 bit multiply
2248 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2249 if (IsSigned) {
2250 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
2251 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
2252 DAG.getConstant(63, DL, MVT::i64));
2253 // It is important that LowerBits is last, otherwise the arithmetic
2254 // shift will not be folded into the compare (SUBS).
2255 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2256 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2257 .getValue(1);
2258 } else {
2259 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
2260 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2261 Overflow =
2262 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2263 DAG.getConstant(0, DL, MVT::i64),
2264 UpperBits).getValue(1);
2265 }
2266 break;
2267 }
2268 } // switch (...)
2269
2270 if (Opc) {
2271 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
2272
2273 // Emit the AArch64 operation with overflow check.
2274 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
2275 Overflow = Value.getValue(1);
2276 }
2277 return std::make_pair(Value, Overflow);
2278}
2279
2280SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
2281 RTLIB::Libcall Call) const {
2282 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2283 MakeLibCallOptions CallOptions;
2284 return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
2285}
2286
2287// Returns true if the given Op is the overflow flag result of an overflow
2288// intrinsic operation.
2289static bool isOverflowIntrOpRes(SDValue Op) {
2290 unsigned Opc = Op.getOpcode();
2291 return (Op.getResNo() == 1 &&
2292 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2293 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2294}
2295
2296static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
2297 SDValue Sel = Op.getOperand(0);
2298 SDValue Other = Op.getOperand(1);
2299 SDLoc dl(Sel);
2300
2301 // If the operand is an overflow checking operation, invert the condition
2302 // code and kill the Not operation. I.e., transform:
2303 // (xor (overflow_op_bool, 1))
2304 // -->
2305 // (csel 1, 0, invert(cc), overflow_op_bool)
2306 // ... which later gets transformed to just a cset instruction with an
2307 // inverted condition code, rather than a cset + eor sequence.
2308 if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
2309 // Only lower legal XALUO ops.
2310 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2311 return SDValue();
2312
2313 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2314 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2315 AArch64CC::CondCode CC;
2316 SDValue Value, Overflow;
2317 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2318 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2319 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2320 CCVal, Overflow);
2321 }
2322 // If neither operand is a SELECT_CC, give up.
2323 if (Sel.getOpcode() != ISD::SELECT_CC)
2324 std::swap(Sel, Other);
2325 if (Sel.getOpcode() != ISD::SELECT_CC)
2326 return Op;
2327
2328 // The folding we want to perform is:
2329 // (xor x, (select_cc a, b, cc, 0, -1) )
2330 // -->
2331 // (csel x, (xor x, -1), cc ...)
2332 //
2333 // The latter will get matched to a CSINV instruction.
2334
2335 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2336 SDValue LHS = Sel.getOperand(0);
2337 SDValue RHS = Sel.getOperand(1);
2338 SDValue TVal = Sel.getOperand(2);
2339 SDValue FVal = Sel.getOperand(3);
2340
2341 // FIXME: This could be generalized to non-integer comparisons.
2342 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2343 return Op;
2344
2345 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2346 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2347
2348 // The values aren't constants, this isn't the pattern we're looking for.
2349 if (!CFVal || !CTVal)
2350 return Op;
2351
2352 // We can commute the SELECT_CC by inverting the condition. This
2353 // might be needed to make this fit into a CSINV pattern.
2354 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2355 std::swap(TVal, FVal);
2356 std::swap(CTVal, CFVal);
2357 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
2358 }
2359
2360 // If the constants line up, perform the transform!
2361 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2362 SDValue CCVal;
2363 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2364
2365 FVal = Other;
2366 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2367 DAG.getConstant(-1ULL, dl, Other.getValueType()));
2368
2369 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2370 CCVal, Cmp);
2371 }
2372
2373 return Op;
2374}
2375
2376static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2377 EVT VT = Op.getValueType();
2378
2379 // Let legalize expand this if it isn't a legal type yet.
2380 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2381 return SDValue();
2382
2383 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2384
2385 unsigned Opc;
2386 bool ExtraOp = false;
2387 switch (Op.getOpcode()) {
2388 default:
2389 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2389)
;
2390 case ISD::ADDC:
2391 Opc = AArch64ISD::ADDS;
2392 break;
2393 case ISD::SUBC:
2394 Opc = AArch64ISD::SUBS;
2395 break;
2396 case ISD::ADDE:
2397 Opc = AArch64ISD::ADCS;
2398 ExtraOp = true;
2399 break;
2400 case ISD::SUBE:
2401 Opc = AArch64ISD::SBCS;
2402 ExtraOp = true;
2403 break;
2404 }
2405
2406 if (!ExtraOp)
2407 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2408 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2409 Op.getOperand(2));
2410}
2411
2412static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
2413 // Let legalize expand this if it isn't a legal type yet.
2414 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
2415 return SDValue();
2416
2417 SDLoc dl(Op);
2418 AArch64CC::CondCode CC;
2419 // The actual operation that sets the overflow or carry flag.
2420 SDValue Value, Overflow;
2421 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2422
2423 // We use 0 and 1 as false and true values.
2424 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2425 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2426
2427 // We use an inverted condition, because the conditional select is inverted
2428 // too. This will allow it to be selected to a single instruction:
2429 // CSINC Wd, WZR, WZR, invert(cond).
2430 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2431 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2432 CCVal, Overflow);
2433
2434 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2435 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2436}
2437
2438// Prefetch operands are:
2439// 1: Address to prefetch
2440// 2: bool isWrite
2441// 3: int locality (0 = no locality ... 3 = extreme locality)
2442// 4: bool isDataCache
2443static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
2444 SDLoc DL(Op);
2445 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2446 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2447 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2448
2449 bool IsStream = !Locality;
2450 // When the locality number is set
2451 if (Locality) {
2452 // The front-end should have filtered out the out-of-range values
2453 assert(Locality <= 3 && "Prefetch locality out-of-range")((Locality <= 3 && "Prefetch locality out-of-range"
) ? static_cast<void> (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2453, __PRETTY_FUNCTION__))
;
2454 // The locality degree is the opposite of the cache speed.
2455 // Put the number the other way around.
2456 // The encoding starts at 0 for level 1
2457 Locality = 3 - Locality;
2458 }
2459
2460 // built the mask value encoding the expected behavior.
2461 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
2462 (!IsData << 3) | // IsDataCache bit
2463 (Locality << 1) | // Cache level bits
2464 (unsigned)IsStream; // Stream bit
2465 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2466 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2467}
2468
2469SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2470 SelectionDAG &DAG) const {
2471 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")((Op.getValueType() == MVT::f128 && "Unexpected lowering"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2471, __PRETTY_FUNCTION__))
;
2472
2473 RTLIB::Libcall LC;
2474 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
2475
2476 return LowerF128Call(Op, DAG, LC);
2477}
2478
2479SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2480 SelectionDAG &DAG) const {
2481 if (Op.getOperand(0).getValueType() != MVT::f128) {
2482 // It's legal except when f128 is involved
2483 return Op;
2484 }
2485
2486 RTLIB::Libcall LC;
2487 LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
2488
2489 // FP_ROUND node has a second operand indicating whether it is known to be
2490 // precise. That doesn't take part in the LibCall so we can't directly use
2491 // LowerF128Call.
2492 SDValue SrcVal = Op.getOperand(0);
2493 MakeLibCallOptions CallOptions;
2494 return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, CallOptions,
2495 SDLoc(Op)).first;
2496}
2497
2498SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
2499 SelectionDAG &DAG) const {
2500 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2501 // Any additional optimization in this function should be recorded
2502 // in the cost tables.
2503 EVT InVT = Op.getOperand(0).getValueType();
2504 EVT VT = Op.getValueType();
2505 unsigned NumElts = InVT.getVectorNumElements();
2506
2507 // f16 conversions are promoted to f32 when full fp16 is not supported.
2508 if (InVT.getVectorElementType() == MVT::f16 &&
2509 !Subtarget->hasFullFP16()) {
2510 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2511 SDLoc dl(Op);
2512 return DAG.getNode(
2513 Op.getOpcode(), dl, Op.getValueType(),
2514 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2515 }
2516
2517 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2518 SDLoc dl(Op);
2519 SDValue Cv =
2520 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2521 Op.getOperand(0));
2522 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2523 }
2524
2525 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2526 SDLoc dl(Op);
2527 MVT ExtVT =
2528 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
2529 VT.getVectorNumElements());
2530 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2531 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2532 }
2533
2534 // Type changing conversions are illegal.
2535 return Op;
2536}
2537
2538SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2539 SelectionDAG &DAG) const {
2540 if (Op.getOperand(0).getValueType().isVector())
2541 return LowerVectorFP_TO_INT(Op, DAG);
2542
2543 // f16 conversions are promoted to f32 when full fp16 is not supported.
2544 if (Op.getOperand(0).getValueType() == MVT::f16 &&
2545 !Subtarget->hasFullFP16()) {
2546 SDLoc dl(Op);
2547 return DAG.getNode(
2548 Op.getOpcode(), dl, Op.getValueType(),
2549 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
2550 }
2551
2552 if (Op.getOperand(0).getValueType() != MVT::f128) {
2553 // It's legal except when f128 is involved
2554 return Op;
2555 }
2556
2557 RTLIB::Libcall LC;
2558 if (Op.getOpcode() == ISD::FP_TO_SINT)
2559 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
2560 else
2561 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
2562
2563 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2564 MakeLibCallOptions CallOptions;
2565 return makeLibCall(DAG, LC, Op.getValueType(), Ops, CallOptions, SDLoc(Op)).first;
2566}
2567
2568static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2569 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2570 // Any additional optimization in this function should be recorded
2571 // in the cost tables.
2572 EVT VT = Op.getValueType();
2573 SDLoc dl(Op);
2574 SDValue In = Op.getOperand(0);
2575 EVT InVT = In.getValueType();
2576
2577 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2578 MVT CastVT =
2579 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
2580 InVT.getVectorNumElements());
2581 In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2582 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2583 }
2584
2585 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2586 unsigned CastOpc =
2587 Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2588 EVT CastVT = VT.changeVectorElementTypeToInteger();
2589 In = DAG.getNode(CastOpc, dl, CastVT, In);
2590 return DAG.getNode(Op.getOpcode(), dl, VT, In);
2591 }
2592
2593 return Op;
2594}
2595
2596SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2597 SelectionDAG &DAG) const {
2598 if (Op.getValueType().isVector())
2599 return LowerVectorINT_TO_FP(Op, DAG);
2600
2601 // f16 conversions are promoted to f32 when full fp16 is not supported.
2602 if (Op.getValueType() == MVT::f16 &&
2603 !Subtarget->hasFullFP16()) {
2604 SDLoc dl(Op);
2605 return DAG.getNode(
2606 ISD::FP_ROUND, dl, MVT::f16,
2607 DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
2608 DAG.getIntPtrConstant(0, dl));
2609 }
2610
2611 // i128 conversions are libcalls.
2612 if (Op.getOperand(0).getValueType() == MVT::i128)
2613 return SDValue();
2614
2615 // Other conversions are legal, unless it's to the completely software-based
2616 // fp128.
2617 if (Op.getValueType() != MVT::f128)
2618 return Op;
2619
2620 RTLIB::Libcall LC;
2621 if (Op.getOpcode() == ISD::SINT_TO_FP)
2622 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2623 else
2624 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2625
2626 return LowerF128Call(Op, DAG, LC);
2627}
2628
2629SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2630 SelectionDAG &DAG) const {
2631 // For iOS, we want to call an alternative entry point: __sincos_stret,
2632 // which returns the values in two S / D registers.
2633 SDLoc dl(Op);
2634 SDValue Arg = Op.getOperand(0);
2635 EVT ArgVT = Arg.getValueType();
2636 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2637
2638 ArgListTy Args;
2639 ArgListEntry Entry;
2640
2641 Entry.Node = Arg;
2642 Entry.Ty = ArgTy;
2643 Entry.IsSExt = false;
2644 Entry.IsZExt = false;
2645 Args.push_back(Entry);
2646
2647 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
2648 : RTLIB::SINCOS_STRET_F32;
2649 const char *LibcallName = getLibcallName(LC);
2650 SDValue Callee =
2651 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2652
2653 StructType *RetTy = StructType::get(ArgTy, ArgTy);
2654 TargetLowering::CallLoweringInfo CLI(DAG);
2655 CLI.setDebugLoc(dl)
2656 .setChain(DAG.getEntryNode())
2657 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2658
2659 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2660 return CallResult.first;
2661}
2662
2663static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2664 if (Op.getValueType() != MVT::f16)
2665 return SDValue();
2666
2667 assert(Op.getOperand(0).getValueType() == MVT::i16)((Op.getOperand(0).getValueType() == MVT::i16) ? static_cast<
void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2667, __PRETTY_FUNCTION__))
;
2668 SDLoc DL(Op);
2669
2670 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2671 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2672 return SDValue(
2673 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2674 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2675 0);
2676}
2677
2678static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2679 if (OrigVT.getSizeInBits() >= 64)
2680 return OrigVT;
2681
2682 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2682, __PRETTY_FUNCTION__))
;
2683
2684 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2685 switch (OrigSimpleTy) {
2686 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2686)
;
2687 case MVT::v2i8:
2688 case MVT::v2i16:
2689 return MVT::v2i32;
2690 case MVT::v4i8:
2691 return MVT::v4i16;
2692 }
2693}
2694
2695static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2696 const EVT &OrigTy,
2697 const EVT &ExtTy,
2698 unsigned ExtOpcode) {
2699 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2700 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2701 // 64-bits we need to insert a new extension so that it will be 64-bits.
2702 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2702, __PRETTY_FUNCTION__))
;
2703 if (OrigTy.getSizeInBits() >= 64)
2704 return N;
2705
2706 // Must extend size to at least 64 bits to be used as an operand for VMULL.
2707 EVT NewVT = getExtensionTo64Bits(OrigTy);
2708
2709 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2710}
2711
2712static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2713 bool isSigned) {
2714 EVT VT = N->getValueType(0);
2715
2716 if (N->getOpcode() != ISD::BUILD_VECTOR)
2717 return false;
2718
2719 for (const SDValue &Elt : N->op_values()) {
2720 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2721 unsigned EltSize = VT.getScalarSizeInBits();
2722 unsigned HalfSize = EltSize / 2;
2723 if (isSigned) {
2724 if (!isIntN(HalfSize, C->getSExtValue()))
2725 return false;
2726 } else {
2727 if (!isUIntN(HalfSize, C->getZExtValue()))
2728 return false;
2729 }
2730 continue;
2731 }
2732 return false;
2733 }
2734
2735 return true;
2736}
2737
2738static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2739 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2740 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2741 N->getOperand(0)->getValueType(0),
2742 N->getValueType(0),
2743 N->getOpcode());
2744
2745 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2745, __PRETTY_FUNCTION__))
;
2746 EVT VT = N->getValueType(0);
2747 SDLoc dl(N);
2748 unsigned EltSize = VT.getScalarSizeInBits() / 2;
2749 unsigned NumElts = VT.getVectorNumElements();
2750 MVT TruncVT = MVT::getIntegerVT(EltSize);
2751 SmallVector<SDValue, 8> Ops;
2752 for (unsigned i = 0; i != NumElts; ++i) {
2753 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2754 const APInt &CInt = C->getAPIntValue();
2755 // Element types smaller than 32 bits are not legal, so use i32 elements.
2756 // The values are implicitly truncated so sext vs. zext doesn't matter.
2757 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2758 }
2759 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2760}
2761
2762static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2763 return N->getOpcode() == ISD::SIGN_EXTEND ||
2764 isExtendedBUILD_VECTOR(N, DAG, true);
2765}
2766
2767static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2768 return N->getOpcode() == ISD::ZERO_EXTEND ||
2769 isExtendedBUILD_VECTOR(N, DAG, false);
2770}
2771
2772static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2773 unsigned Opcode = N->getOpcode();
2774 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2775 SDNode *N0 = N->getOperand(0).getNode();
2776 SDNode *N1 = N->getOperand(1).getNode();
2777 return N0->hasOneUse() && N1->hasOneUse() &&
2778 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2779 }
2780 return false;
2781}
2782
2783static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2784 unsigned Opcode = N->getOpcode();
2785 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2786 SDNode *N0 = N->getOperand(0).getNode();
2787 SDNode *N1 = N->getOperand(1).getNode();
2788 return N0->hasOneUse() && N1->hasOneUse() &&
2789 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2790 }
2791 return false;
2792}
2793
2794SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2795 SelectionDAG &DAG) const {
2796 // The rounding mode is in bits 23:22 of the FPSCR.
2797 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2798 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2799 // so that the shift + and get folded into a bitfield extract.
2800 SDLoc dl(Op);
2801
2802 SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
2803 DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl,
2804 MVT::i64));
2805 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
2806 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
2807 DAG.getConstant(1U << 22, dl, MVT::i32));
2808 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2809 DAG.getConstant(22, dl, MVT::i32));
2810 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2811 DAG.getConstant(3, dl, MVT::i32));
2812}
2813
2814static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2815 // Multiplications are only custom-lowered for 128-bit vectors so that
2816 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2817 EVT VT = Op.getValueType();
2818 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2819, __PRETTY_FUNCTION__))
2819 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2819, __PRETTY_FUNCTION__))
;
2820 SDNode *N0 = Op.getOperand(0).getNode();
2821 SDNode *N1 = Op.getOperand(1).getNode();
2822 unsigned NewOpc = 0;
2823 bool isMLA = false;
2824 bool isN0SExt = isSignExtended(N0, DAG);
2825 bool isN1SExt = isSignExtended(N1, DAG);
2826 if (isN0SExt && isN1SExt)
2827 NewOpc = AArch64ISD::SMULL;
2828 else {
2829 bool isN0ZExt = isZeroExtended(N0, DAG);
2830 bool isN1ZExt = isZeroExtended(N1, DAG);
2831 if (isN0ZExt && isN1ZExt)
2832 NewOpc = AArch64ISD::UMULL;
2833 else if (isN1SExt || isN1ZExt) {
2834 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2835 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2836 if (isN1SExt && isAddSubSExt(N0, DAG)) {
2837 NewOpc = AArch64ISD::SMULL;
2838 isMLA = true;
2839 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2840 NewOpc = AArch64ISD::UMULL;
2841 isMLA = true;
2842 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2843 std::swap(N0, N1);
2844 NewOpc = AArch64ISD::UMULL;
2845 isMLA = true;
2846 }
2847 }
2848
2849 if (!NewOpc) {
2850 if (VT == MVT::v2i64)
2851 // Fall through to expand this. It is not legal.
2852 return SDValue();
2853 else
2854 // Other vector multiplications are legal.
2855 return Op;
2856 }
2857 }
2858
2859 // Legalize to a S/UMULL instruction
2860 SDLoc DL(Op);
2861 SDValue Op0;
2862 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2863 if (!isMLA) {
2864 Op0 = skipExtensionForVectorMULL(N0, DAG);
2865 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2867, __PRETTY_FUNCTION__))
2866 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2867, __PRETTY_FUNCTION__))
2867 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2867, __PRETTY_FUNCTION__))
;
2868 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2869 }
2870 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2871 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2872 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2873 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2874 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2875 EVT Op1VT = Op1.getValueType();
2876 return DAG.getNode(N0->getOpcode(), DL, VT,
2877 DAG.getNode(NewOpc, DL, VT,
2878 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2879 DAG.getNode(NewOpc, DL, VT,
2880 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2881}
2882
2883SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2884 SelectionDAG &DAG) const {
2885 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2886 SDLoc dl(Op);
2887 switch (IntNo) {
2888 default: return SDValue(); // Don't custom lower most intrinsics.
2889 case Intrinsic::thread_pointer: {
2890 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2891 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2892 }
2893 case Intrinsic::aarch64_neon_abs: {
2894 EVT Ty = Op.getValueType();
2895 if (Ty == MVT::i64) {
2896 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
2897 Op.getOperand(1));
2898 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
2899 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
2900 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
2901 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
2902 } else {
2903 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
2904 }
2905 }
2906 case Intrinsic::aarch64_neon_smax:
2907 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2908 Op.getOperand(1), Op.getOperand(2));
2909 case Intrinsic::aarch64_neon_umax:
2910 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2911 Op.getOperand(1), Op.getOperand(2));
2912 case Intrinsic::aarch64_neon_smin:
2913 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2914 Op.getOperand(1), Op.getOperand(2));
2915 case Intrinsic::aarch64_neon_umin:
2916 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2917 Op.getOperand(1), Op.getOperand(2));
2918
2919 case Intrinsic::aarch64_sve_sunpkhi:
2920 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
2921 Op.getOperand(1));
2922 case Intrinsic::aarch64_sve_sunpklo:
2923 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
2924 Op.getOperand(1));
2925 case Intrinsic::aarch64_sve_uunpkhi:
2926 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
2927 Op.getOperand(1));
2928 case Intrinsic::aarch64_sve_uunpklo:
2929 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
2930 Op.getOperand(1));
2931 case Intrinsic::aarch64_sve_clasta_n:
2932 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
2933 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
2934 case Intrinsic::aarch64_sve_clastb_n:
2935 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
2936 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
2937 case Intrinsic::aarch64_sve_lasta:
2938 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
2939 Op.getOperand(1), Op.getOperand(2));
2940 case Intrinsic::aarch64_sve_lastb:
2941 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
2942 Op.getOperand(1), Op.getOperand(2));
2943 case Intrinsic::aarch64_sve_rev:
2944 return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(),
2945 Op.getOperand(1));
2946 case Intrinsic::aarch64_sve_tbl:
2947 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
2948 Op.getOperand(1), Op.getOperand(2));
2949 case Intrinsic::aarch64_sve_trn1:
2950 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
2951 Op.getOperand(1), Op.getOperand(2));
2952 case Intrinsic::aarch64_sve_trn2:
2953 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
2954 Op.getOperand(1), Op.getOperand(2));
2955 case Intrinsic::aarch64_sve_uzp1:
2956 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
2957 Op.getOperand(1), Op.getOperand(2));
2958 case Intrinsic::aarch64_sve_uzp2:
2959 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
2960 Op.getOperand(1), Op.getOperand(2));
2961 case Intrinsic::aarch64_sve_zip1:
2962 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
2963 Op.getOperand(1), Op.getOperand(2));
2964 case Intrinsic::aarch64_sve_zip2:
2965 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
2966 Op.getOperand(1), Op.getOperand(2));
2967 case Intrinsic::aarch64_sve_ptrue:
2968 return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
2969 Op.getOperand(1));
2970
2971 case Intrinsic::aarch64_sve_insr: {
2972 SDValue Scalar = Op.getOperand(2);
2973 EVT ScalarTy = Scalar.getValueType();
2974 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
2975 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
2976
2977 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
2978 Op.getOperand(1), Scalar);
2979 }
2980
2981 case Intrinsic::localaddress: {
2982 const auto &MF = DAG.getMachineFunction();
2983 const auto *RegInfo = Subtarget->getRegisterInfo();
2984 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
2985 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
2986 Op.getSimpleValueType());
2987 }
2988
2989 case Intrinsic::eh_recoverfp: {
2990 // FIXME: This needs to be implemented to correctly handle highly aligned
2991 // stack objects. For now we simply return the incoming FP. Refer D53541
2992 // for more details.
2993 SDValue FnOp = Op.getOperand(1);
2994 SDValue IncomingFPOp = Op.getOperand(2);
2995 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
2996 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
2997 if (!Fn)
2998 report_fatal_error(
2999 "llvm.eh.recoverfp must take a function as the first argument");
3000 return IncomingFPOp;
3001 }
3002 }
3003}
3004
3005bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
3006 return ExtVal.getValueType().isScalableVector();
3007}
3008
3009// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
3010static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
3011 EVT VT, EVT MemVT,
3012 SelectionDAG &DAG) {
3013 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3013, __PRETTY_FUNCTION__))
;
3014 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)((MemVT == MVT::v4i8 && VT == MVT::v4i16) ? static_cast
<void> (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3014, __PRETTY_FUNCTION__))
;
3015
3016 SDValue Value = ST->getValue();
3017
3018 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
3019 // the word lane which represent the v4i8 subvector. It optimizes the store
3020 // to:
3021 //
3022 // xtn v0.8b, v0.8h
3023 // str s0, [x0]
3024
3025 SDValue Undef = DAG.getUNDEF(MVT::i16);
3026 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
3027 {Undef, Undef, Undef, Undef});
3028
3029 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
3030 Value, UndefVec);
3031 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
3032
3033 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
3034 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3035 Trunc, DAG.getConstant(0, DL, MVT::i64));
3036
3037 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
3038 ST->getBasePtr(), ST->getMemOperand());
3039}
3040
3041// Custom lowering for any store, vector or scalar and/or default or with
3042// a truncate operations. Currently only custom lower truncate operation
3043// from vector v4i16 to v4i8 or volatile stores of i128.
3044SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
3045 SelectionDAG &DAG) const {
3046 SDLoc Dl(Op);
3047 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
3048 assert (StoreNode && "Can only custom lower store nodes")((StoreNode && "Can only custom lower store nodes") ?
static_cast<void> (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3048, __PRETTY_FUNCTION__))
;
3049
3050 SDValue Value = StoreNode->getValue();
3051
3052 EVT VT = Value.getValueType();
3053 EVT MemVT = StoreNode->getMemoryVT();
3054
3055 if (VT.isVector()) {
3056 unsigned AS = StoreNode->getAddressSpace();
3057 unsigned Align = StoreNode->getAlignment();
3058 if (Align < MemVT.getStoreSize() &&
3059 !allowsMisalignedMemoryAccesses(MemVT, AS, Align,
3060 StoreNode->getMemOperand()->getFlags(),
3061 nullptr)) {
3062 return scalarizeVectorStore(StoreNode, DAG);
3063 }
3064
3065 if (StoreNode->isTruncatingStore()) {
3066 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
3067 }
3068 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
3069 assert(StoreNode->getValue()->getValueType(0) == MVT::i128)((StoreNode->getValue()->getValueType(0) == MVT::i128) ?
static_cast<void> (0) : __assert_fail ("StoreNode->getValue()->getValueType(0) == MVT::i128"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3069, __PRETTY_FUNCTION__))
;
3070 SDValue Lo =
3071 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
3072 DAG.getConstant(0, Dl, MVT::i64));
3073 SDValue Hi =
3074 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
3075 DAG.getConstant(1, Dl, MVT::i64));
3076 SDValue Result = DAG.getMemIntrinsicNode(
3077 AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
3078 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
3079 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
3080 return Result;
3081 }
3082
3083 return SDValue();
3084}
3085
3086SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
3087 SelectionDAG &DAG) const {
3088 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
3089 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
3090
3091 switch (Op.getOpcode()) {
3092 default:
3093 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3093)
;
3094 return SDValue();
3095 case ISD::BITCAST:
3096 return LowerBITCAST(Op, DAG);
3097 case ISD::GlobalAddress:
3098 return LowerGlobalAddress(Op, DAG);
3099 case ISD::GlobalTLSAddress:
3100 return LowerGlobalTLSAddress(Op, DAG);
3101 case ISD::SETCC:
3102 return LowerSETCC(Op, DAG);
3103 case ISD::BR_CC:
3104 return LowerBR_CC(Op, DAG);
3105 case ISD::SELECT:
3106 return LowerSELECT(Op, DAG);
3107 case ISD::SELECT_CC:
3108 return LowerSELECT_CC(Op, DAG);
3109 case ISD::JumpTable:
3110 return LowerJumpTable(Op, DAG);
3111 case ISD::BR_JT:
3112 return LowerBR_JT(Op, DAG);
3113 case ISD::ConstantPool:
3114 return LowerConstantPool(Op, DAG);
3115 case ISD::BlockAddress:
3116 return LowerBlockAddress(Op, DAG);
3117 case ISD::VASTART:
3118 return LowerVASTART(Op, DAG);
3119 case ISD::VACOPY:
3120 return LowerVACOPY(Op, DAG);
3121 case ISD::VAARG:
3122 return LowerVAARG(Op, DAG);
3123 case ISD::ADDC:
3124 case ISD::ADDE:
3125 case ISD::SUBC:
3126 case ISD::SUBE:
3127 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
3128 case ISD::SADDO:
3129 case ISD::UADDO:
3130 case ISD::SSUBO:
3131 case ISD::USUBO:
3132 case ISD::SMULO:
3133 case ISD::UMULO:
3134 return LowerXALUO(Op, DAG);
3135 case ISD::FADD:
3136 return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
3137 case ISD::FSUB:
3138 return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
3139 case ISD::FMUL:
3140 return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
3141 case ISD::FDIV:
3142 return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
3143 case ISD::FP_ROUND:
3144 return LowerFP_ROUND(Op, DAG);
3145 case ISD::FP_EXTEND:
3146 return LowerFP_EXTEND(Op, DAG);
3147 case ISD::FRAMEADDR:
3148 return LowerFRAMEADDR(Op, DAG);
3149 case ISD::SPONENTRY:
3150 return LowerSPONENTRY(Op, DAG);
3151 case ISD::RETURNADDR:
3152 return LowerRETURNADDR(Op, DAG);
3153 case ISD::ADDROFRETURNADDR:
3154 return LowerADDROFRETURNADDR(Op, DAG);
3155 case ISD::INSERT_VECTOR_ELT:
3156 return LowerINSERT_VECTOR_ELT(Op, DAG);
3157 case ISD::EXTRACT_VECTOR_ELT:
3158 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3159 case ISD::BUILD_VECTOR:
3160 return LowerBUILD_VECTOR(Op, DAG);
3161 case ISD::VECTOR_SHUFFLE:
3162 return LowerVECTOR_SHUFFLE(Op, DAG);
3163 case ISD::SPLAT_VECTOR:
3164 return LowerSPLAT_VECTOR(Op, DAG);
3165 case ISD::EXTRACT_SUBVECTOR:
3166 return LowerEXTRACT_SUBVECTOR(Op, DAG);
3167 case ISD::SRA:
3168 case ISD::SRL:
3169 case ISD::SHL:
3170 return LowerVectorSRA_SRL_SHL(Op, DAG);
3171 case ISD::SHL_PARTS:
3172 return LowerShiftLeftParts(Op, DAG);
3173 case ISD::SRL_PARTS:
3174 case ISD::SRA_PARTS:
3175 return LowerShiftRightParts(Op, DAG);
3176 case ISD::CTPOP:
3177 return LowerCTPOP(Op, DAG);
3178 case ISD::FCOPYSIGN:
3179 return LowerFCOPYSIGN(Op, DAG);
3180 case ISD::OR:
3181 return LowerVectorOR(Op, DAG);
3182 case ISD::XOR:
3183 return LowerXOR(Op, DAG);
3184 case ISD::PREFETCH:
3185 return LowerPREFETCH(Op, DAG);
3186 case ISD::SINT_TO_FP:
3187 case ISD::UINT_TO_FP:
3188 return LowerINT_TO_FP(Op, DAG);
3189 case ISD::FP_TO_SINT:
3190 case ISD::FP_TO_UINT:
3191 return LowerFP_TO_INT(Op, DAG);
3192 case ISD::FSINCOS:
3193 return LowerFSINCOS(Op, DAG);
3194 case ISD::FLT_ROUNDS_:
3195 return LowerFLT_ROUNDS_(Op, DAG);
3196 case ISD::MUL:
3197 return LowerMUL(Op, DAG);
3198 case ISD::INTRINSIC_WO_CHAIN:
3199 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3200 case ISD::STORE:
3201 return LowerSTORE(Op, DAG);
3202 case ISD::VECREDUCE_ADD:
3203 case ISD::VECREDUCE_SMAX:
3204 case ISD::VECREDUCE_SMIN:
3205 case ISD::VECREDUCE_UMAX:
3206 case ISD::VECREDUCE_UMIN:
3207 case ISD::VECREDUCE_FMAX:
3208 case ISD::VECREDUCE_FMIN:
3209 return LowerVECREDUCE(Op, DAG);
3210 case ISD::ATOMIC_LOAD_SUB:
3211 return LowerATOMIC_LOAD_SUB(Op, DAG);
3212 case ISD::ATOMIC_LOAD_AND:
3213 return LowerATOMIC_LOAD_AND(Op, DAG);
3214 case ISD::DYNAMIC_STACKALLOC:
3215 return LowerDYNAMIC_STACKALLOC(Op, DAG);
3216 }
3217}
3218
3219//===----------------------------------------------------------------------===//
3220// Calling Convention Implementation
3221//===----------------------------------------------------------------------===//
3222
3223/// Selects the correct CCAssignFn for a given CallingConvention value.
3224CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
3225 bool IsVarArg) const {
3226 switch (CC) {
3227 default:
3228 report_fatal_error("Unsupported calling convention.");
3229 case CallingConv::AArch64_SVE_VectorCall:
3230 // Calling SVE functions is currently not yet supported.
3231 report_fatal_error("Unsupported calling convention.");
3232 case CallingConv::WebKit_JS:
3233 return CC_AArch64_WebKit_JS;
3234 case CallingConv::GHC:
3235 return CC_AArch64_GHC;
3236 case CallingConv::C:
3237 case CallingConv::Fast:
3238 case CallingConv::PreserveMost:
3239 case CallingConv::CXX_FAST_TLS:
3240 case CallingConv::Swift:
3241 if (Subtarget->isTargetWindows() && IsVarArg)
3242 return CC_AArch64_Win64_VarArg;
3243 if (!Subtarget->isTargetDarwin())
3244 return CC_AArch64_AAPCS;
3245 if (!IsVarArg)
3246 return CC_AArch64_DarwinPCS;
3247 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
3248 : CC_AArch64_DarwinPCS_VarArg;
3249 case CallingConv::Win64:
3250 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
3251 case CallingConv::CFGuard_Check:
3252 return CC_AArch64_Win64_CFGuard_Check;
3253 case CallingConv::AArch64_VectorCall:
3254 return CC_AArch64_AAPCS;
3255 }
3256}
3257
3258CCAssignFn *
3259AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
3260 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3261 : RetCC_AArch64_AAPCS;
3262}
3263
3264SDValue AArch64TargetLowering::LowerFormalArguments(
3265 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3266 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3267 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3268 MachineFunction &MF = DAG.getMachineFunction();
3269 MachineFrameInfo &MFI = MF.getFrameInfo();
3270 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3271
3272 // Assign locations to all of the incoming arguments.
3273 SmallVector<CCValAssign, 16> ArgLocs;
3274 DenseMap<unsigned, SDValue> CopiedRegs;
3275 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3276 *DAG.getContext());
3277
3278 // At this point, Ins[].VT may already be promoted to i32. To correctly
3279 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3280 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3281 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
3282 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
3283 // LocVT.
3284 unsigned NumArgs = Ins.size();
3285 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
3286 unsigned CurArgIdx = 0;
3287 for (unsigned i = 0; i != NumArgs; ++i) {
3288 MVT ValVT = Ins[i].VT;
3289 if (Ins[i].isOrigArg()) {
3290 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
3291 CurArgIdx = Ins[i].getOrigArgIndex();
3292
3293 // Get type of the original argument.
3294 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
3295 /*AllowUnknown*/ true);
3296 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
3297 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3298 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3299 ValVT = MVT::i8;
3300 else if (ActualMVT == MVT::i16)
3301 ValVT = MVT::i16;
3302 }
3303 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3304 bool Res =
3305 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
3306 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3306, __PRETTY_FUNCTION__))
;
3307 (void)Res;
3308 }
3309 assert(ArgLocs.size() == Ins.size())((ArgLocs.size() == Ins.size()) ? static_cast<void> (0)
: __assert_fail ("ArgLocs.size() == Ins.size()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3309, __PRETTY_FUNCTION__))
;
3310 SmallVector<SDValue, 16> ArgValues;
3311 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3312 CCValAssign &VA = ArgLocs[i];
3313
3314 if (Ins[i].Flags.isByVal()) {
3315 // Byval is used for HFAs in the PCS, but the system should work in a
3316 // non-compliant manner for larger structs.
3317 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3318 int Size = Ins[i].Flags.getByValSize();
3319 unsigned NumRegs = (Size + 7) / 8;
3320
3321 // FIXME: This works on big-endian for composite byvals, which are the common
3322 // case. It should also work for fundamental types too.
3323 unsigned FrameIdx =
3324 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
3325 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
3326 InVals.push_back(FrameIdxN);
3327
3328 continue;
3329 }
3330
3331 SDValue ArgValue;
3332 if (VA.isRegLoc()) {
3333 // Arguments stored in registers.
3334 EVT RegVT = VA.getLocVT();
3335 const TargetRegisterClass *RC;
3336
3337 if (RegVT == MVT::i32)
3338 RC = &AArch64::GPR32RegClass;
3339 else if (RegVT == MVT::i64)
3340 RC = &AArch64::GPR64RegClass;
3341 else if (RegVT == MVT::f16)
3342 RC = &AArch64::FPR16RegClass;
3343 else if (RegVT == MVT::f32)
3344 RC = &AArch64::FPR32RegClass;
3345 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
3346 RC = &AArch64::FPR64RegClass;
3347 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
3348 RC = &AArch64::FPR128RegClass;
3349 else if (RegVT.isScalableVector() &&
3350 RegVT.getVectorElementType() == MVT::i1)
3351 RC = &AArch64::PPRRegClass;
3352 else if (RegVT.isScalableVector())
3353 RC = &AArch64::ZPRRegClass;
3354 else
3355 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3355)
;
3356
3357 // Transform the arguments in physical registers into virtual ones.
3358 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3359 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
3360
3361 // If this is an 8, 16 or 32-bit value, it is really passed promoted
3362 // to 64 bits. Insert an assert[sz]ext to capture this, then
3363 // truncate to the right size.
3364 switch (VA.getLocInfo()) {
3365 default:
3366 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3366)
;
3367 case CCValAssign::Full:
3368 break;
3369 case CCValAssign::Indirect:
3370 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3371, __PRETTY_FUNCTION__))
3371 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3371, __PRETTY_FUNCTION__))
;
3372 llvm_unreachable("Spilling of SVE vectors not yet implemented")::llvm::llvm_unreachable_internal("Spilling of SVE vectors not yet implemented"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3372)
;
3373 case CCValAssign::BCvt:
3374 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
3375 break;
3376 case CCValAssign::AExt:
3377 case CCValAssign::SExt:
3378 case CCValAssign::ZExt:
3379 break;
3380 case CCValAssign::AExtUpper:
3381 ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
3382 DAG.getConstant(32, DL, RegVT));
3383 ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
3384 break;
3385 }
3386 } else { // VA.isRegLoc()
3387 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")((VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3387, __PRETTY_FUNCTION__))
;
3388 unsigned ArgOffset = VA.getLocMemOffset();
3389 unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
3390
3391 uint32_t BEAlign = 0;
3392 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
3393 !Ins[i].Flags.isInConsecutiveRegs())
3394 BEAlign = 8 - ArgSize;
3395
3396 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
3397
3398 // Create load nodes to retrieve arguments from the stack.
3399 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3400
3401 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
3402 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
3403 MVT MemVT = VA.getValVT();
3404
3405 switch (VA.getLocInfo()) {
3406 default:
3407 break;
3408 case CCValAssign::Trunc:
3409 case CCValAssign::BCvt:
3410 MemVT = VA.getLocVT();
3411 break;
3412 case CCValAssign::Indirect:
3413 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3414, __PRETTY_FUNCTION__))
3414 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3414, __PRETTY_FUNCTION__))
;
3415 llvm_unreachable("Spilling of SVE vectors not yet implemented")::llvm::llvm_unreachable_internal("Spilling of SVE vectors not yet implemented"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3415)
;
3416 case CCValAssign::SExt:
3417 ExtType = ISD::SEXTLOAD;
3418 break;
3419 case CCValAssign::ZExt:
3420 ExtType = ISD::ZEXTLOAD;
3421 break;
3422 case CCValAssign::AExt:
3423 ExtType = ISD::EXTLOAD;
3424 break;
3425 }
3426
3427 ArgValue = DAG.getExtLoad(
3428 ExtType, DL, VA.getLocVT(), Chain, FIN,
3429 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3430 MemVT);
3431
3432 }
3433 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
3434 ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
3435 ArgValue, DAG.getValueType(MVT::i32));
3436 InVals.push_back(ArgValue);
3437 }
3438
3439 // varargs
3440 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3441 if (isVarArg) {
3442 if (!Subtarget->isTargetDarwin() || IsWin64) {
3443 // The AAPCS variadic function ABI is identical to the non-variadic
3444 // one. As a result there may be more arguments in registers and we should
3445 // save them for future reference.
3446 // Win64 variadic functions also pass arguments in registers, but all float
3447 // arguments are passed in integer registers.
3448 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
3449 }
3450
3451 // This will point to the next argument passed via stack.
3452 unsigned StackOffset = CCInfo.getNextStackOffset();
3453 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
3454 StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
3455 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
3456
3457 if (MFI.hasMustTailInVarArgFunc()) {
3458 SmallVector<MVT, 2> RegParmTypes;
3459 RegParmTypes.push_back(MVT::i64);
3460 RegParmTypes.push_back(MVT::f128);
3461 // Compute the set of forwarded registers. The rest are scratch.
3462 SmallVectorImpl<ForwardedRegister> &Forwards =
3463 FuncInfo->getForwardedMustTailRegParms();
3464 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
3465 CC_AArch64_AAPCS);
3466
3467 // Conservatively forward X8, since it might be used for aggregate return.
3468 if (!CCInfo.isAllocated(AArch64::X8)) {
3469 unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
3470 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
3471 }
3472 }
3473 }
3474
3475 // On Windows, InReg pointers must be returned, so record the pointer in a
3476 // virtual register at the start of the function so it can be returned in the
3477 // epilogue.
3478 if (IsWin64) {
3479 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3480 if (Ins[I].Flags.isInReg()) {
3481 assert(!FuncInfo->getSRetReturnReg())((!FuncInfo->getSRetReturnReg()) ? static_cast<void>
(0) : __assert_fail ("!FuncInfo->getSRetReturnReg()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3481, __PRETTY_FUNCTION__))
;
3482
3483 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3484 Register Reg =
3485 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3486 FuncInfo->setSRetReturnReg(Reg);
3487
3488 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
3489 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
3490 break;
3491 }
3492 }
3493 }
3494
3495 unsigned StackArgSize = CCInfo.getNextStackOffset();
3496 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3497 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
3498 // This is a non-standard ABI so by fiat I say we're allowed to make full
3499 // use of the stack area to be popped, which must be aligned to 16 bytes in
3500 // any case:
3501 StackArgSize = alignTo(StackArgSize, 16);
3502
3503 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
3504 // a multiple of 16.
3505 FuncInfo->setArgumentStackToRestore(StackArgSize);
3506
3507 // This realignment carries over to the available bytes below. Our own
3508 // callers will guarantee the space is free by giving an aligned value to
3509 // CALLSEQ_START.
3510 }
3511 // Even if we're not expected to free up the space, it's useful to know how
3512 // much is there while considering tail calls (because we can reuse it).
3513 FuncInfo->setBytesInStackArgArea(StackArgSize);
3514
3515 if (Subtarget->hasCustomCallingConv())
3516 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
3517
3518 return Chain;
3519}
3520
3521void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
3522 SelectionDAG &DAG,
3523 const SDLoc &DL,
3524 SDValue &Chain) const {
3525 MachineFunction &MF = DAG.getMachineFunction();
3526 MachineFrameInfo &MFI = MF.getFrameInfo();
3527 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3528 auto PtrVT = getPointerTy(DAG.getDataLayout());
3529 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3530
3531 SmallVector<SDValue, 8> MemOps;
3532
3533 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
3534 AArch64::X3, AArch64::X4, AArch64::X5,
3535 AArch64::X6, AArch64::X7 };
3536 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
3537 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
3538
3539 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
3540 int GPRIdx = 0;
3541 if (GPRSaveSize != 0) {
3542 if (IsWin64) {
3543 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
3544 if (GPRSaveSize & 15)
3545 // The extra size here, if triggered, will always be 8.
3546 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
3547 } else
3548 GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
3549
3550 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
3551
3552 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
3553 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
3554 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
3555 SDValue Store = DAG.getStore(
3556 Val.getValue(1), DL, Val, FIN,
3557 IsWin64
3558 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
3559 GPRIdx,
3560 (i - FirstVariadicGPR) * 8)
3561 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
3562 MemOps.push_back(Store);
3563 FIN =
3564 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
3565 }
3566 }
3567 FuncInfo->setVarArgsGPRIndex(GPRIdx);
3568 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
3569
3570 if (Subtarget->hasFPARMv8() && !IsWin64) {
3571 static const MCPhysReg FPRArgRegs[] = {
3572 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
3573 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
3574 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
3575 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
3576
3577 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
3578 int FPRIdx = 0;
3579 if (FPRSaveSize != 0) {
3580 FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
3581
3582 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
3583
3584 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
3585 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
3586 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
3587
3588 SDValue Store = DAG.getStore(
3589 Val.getValue(1), DL, Val, FIN,
3590 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
3591 MemOps.push_back(Store);
3592 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3593 DAG.getConstant(16, DL, PtrVT));
3594 }
3595 }
3596 FuncInfo->setVarArgsFPRIndex(FPRIdx);
3597 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3598 }
3599
3600 if (!MemOps.empty()) {
3601 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3602 }
3603}
3604
3605/// LowerCallResult - Lower the result values of a call into the
3606/// appropriate copies out of appropriate physical registers.
3607SDValue AArch64TargetLowering::LowerCallResult(
3608 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3609 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3610 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3611 SDValue ThisVal) const {
3612 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3613 ? RetCC_AArch64_WebKit_JS
3614 : RetCC_AArch64_AAPCS;
3615 // Assign locations to each value returned by this call.
3616 SmallVector<CCValAssign, 16> RVLocs;
3617 DenseMap<unsigned, SDValue> CopiedRegs;
3618 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3619 *DAG.getContext());
3620 CCInfo.AnalyzeCallResult(Ins, RetCC);
3621
3622 // Copy all of the result registers out of their specified physreg.
3623 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3624 CCValAssign VA = RVLocs[i];
3625
3626 // Pass 'this' value directly from the argument to return value, to avoid
3627 // reg unit interference
3628 if (i == 0 && isThisReturn) {
3629 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3630, __PRETTY_FUNCTION__))
3630 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3630, __PRETTY_FUNCTION__))
;
3631 InVals.push_back(ThisVal);
3632 continue;
3633 }
3634
3635 // Avoid copying a physreg twice since RegAllocFast is incompetent and only
3636 // allows one use of a physreg per block.
3637 SDValue Val = CopiedRegs.lookup(VA.getLocReg());
3638 if (!Val) {
3639 Val =
3640 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3641 Chain = Val.getValue(1);
3642 InFlag = Val.getValue(2);
3643 CopiedRegs[VA.getLocReg()] = Val;
3644 }
3645
3646 switch (VA.getLocInfo()) {
3647 default:
3648 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3648)
;
3649 case CCValAssign::Full:
3650 break;
3651 case CCValAssign::BCvt:
3652 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3653 break;
3654 case CCValAssign::AExtUpper:
3655 Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
3656 DAG.getConstant(32, DL, VA.getLocVT()));
3657 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3658 case CCValAssign::AExt:
3659 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3660 case CCValAssign::ZExt:
3661 Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
3662 break;
3663 }
3664
3665 InVals.push_back(Val);
3666 }
3667
3668 return Chain;
3669}
3670
3671/// Return true if the calling convention is one that we can guarantee TCO for.
3672static bool canGuaranteeTCO(CallingConv::ID CC) {
3673 return CC == CallingConv::Fast;
3674}
3675
3676/// Return true if we might ever do TCO for calls with this calling convention.
3677static bool mayTailCallThisCC(CallingConv::ID CC) {
3678 switch (CC) {
3679 case CallingConv::C:
3680 case CallingConv::PreserveMost:
3681 case CallingConv::Swift:
3682 return true;
3683 default:
3684 return canGuaranteeTCO(CC);
3685 }
3686}
3687
3688bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3689 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3690 const SmallVectorImpl<ISD::OutputArg> &Outs,
3691 const SmallVectorImpl<SDValue> &OutVals,
3692 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3693 if (!mayTailCallThisCC(CalleeCC))
3694 return false;
3695
3696 MachineFunction &MF = DAG.getMachineFunction();
3697 const Function &CallerF = MF.getFunction();
3698 CallingConv::ID CallerCC = CallerF.getCallingConv();
3699 bool CCMatch = CallerCC == CalleeCC;
3700
3701 // Byval parameters hand the function a pointer directly into the stack area
3702 // we want to reuse during a tail call. Working around this *is* possible (see
3703 // X86) but less efficient and uglier in LowerCall.
3704 for (Function::const_arg_iterator i = CallerF.arg_begin(),
3705 e = CallerF.arg_end();
3706 i != e; ++i) {
3707 if (i->hasByValAttr())
3708 return false;
3709
3710 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
3711 // In this case, it is necessary to save/restore X0 in the callee. Tail
3712 // call opt interferes with this. So we disable tail call opt when the
3713 // caller has an argument with "inreg" attribute.
3714
3715 // FIXME: Check whether the callee also has an "inreg" argument.
3716 if (i->hasInRegAttr())
3717 return false;
3718 }
3719
3720 if (getTargetMachine().Options.GuaranteedTailCallOpt)
3721 return canGuaranteeTCO(CalleeCC) && CCMatch;
3722
3723 // Externally-defined functions with weak linkage should not be
3724 // tail-called on AArch64 when the OS does not support dynamic
3725 // pre-emption of symbols, as the AAELF spec requires normal calls
3726 // to undefined weak functions to be replaced with a NOP or jump to the
3727 // next instruction. The behaviour of branch instructions in this
3728 // situation (as used for tail calls) is implementation-defined, so we
3729 // cannot rely on the linker replacing the tail call with a return.
3730 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3731 const GlobalValue *GV = G->getGlobal();
3732 const Triple &TT = getTargetMachine().getTargetTriple();
3733 if (GV->hasExternalWeakLinkage() &&
3734 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3735 return false;
3736 }
3737
3738 // Now we search for cases where we can use a tail call without changing the
3739 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3740 // concept.
3741
3742 // I want anyone implementing a new calling convention to think long and hard
3743 // about this assert.
3744 assert((!isVarArg || CalleeCC == CallingConv::C) &&(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3745, __PRETTY_FUNCTION__))
3745 "Unexpected variadic calling convention")(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3745, __PRETTY_FUNCTION__))
;
3746
3747 LLVMContext &C = *DAG.getContext();
3748 if (isVarArg && !Outs.empty()) {
3749 // At least two cases here: if caller is fastcc then we can't have any
3750 // memory arguments (we'd be expected to clean up the stack afterwards). If
3751 // caller is C then we could potentially use its argument area.
3752
3753 // FIXME: for now we take the most conservative of these in both cases:
3754 // disallow all variadic memory operands.
3755 SmallVector<CCValAssign, 16> ArgLocs;
3756 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3757
3758 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3759 for (const CCValAssign &ArgLoc : ArgLocs)
3760 if (!ArgLoc.isRegLoc())
3761 return false;
3762 }
3763
3764 // Check that the call results are passed in the same way.
3765 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3766 CCAssignFnForCall(CalleeCC, isVarArg),
3767 CCAssignFnForCall(CallerCC, isVarArg)))
3768 return false;
3769 // The callee has to preserve all registers the caller needs to preserve.
3770 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3771 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3772 if (!CCMatch) {
3773 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3774 if (Subtarget->hasCustomCallingConv()) {
3775 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
3776 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
3777 }
3778 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3779 return false;
3780 }
3781
3782 // Nothing more to check if the callee is taking no arguments
3783 if (Outs.empty())
3784 return true;
3785
3786 SmallVector<CCValAssign, 16> ArgLocs;
3787 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3788
3789 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3790
3791 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3792
3793 // If the stack arguments for this call do not fit into our own save area then
3794 // the call cannot be made tail.
3795 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3796 return false;
3797
3798 const MachineRegisterInfo &MRI = MF.getRegInfo();
3799 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3800 return false;
3801
3802 return true;
3803}
3804
3805SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3806 SelectionDAG &DAG,
3807 MachineFrameInfo &MFI,
3808 int ClobberedFI) const {
3809 SmallVector<SDValue, 8> ArgChains;
3810 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3811 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3812
3813 // Include the original chain at the beginning of the list. When this is
3814 // used by target LowerCall hooks, this helps legalize find the
3815 // CALLSEQ_BEGIN node.
3816 ArgChains.push_back(Chain);
3817
3818 // Add a chain value for each stack argument corresponding
3819 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
3820 UE = DAG.getEntryNode().getNode()->use_end();
3821 U != UE; ++U)
3822 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
3823 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
3824 if (FI->getIndex() < 0) {
3825 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
3826 int64_t InLastByte = InFirstByte;
3827 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
3828
3829 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
3830 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
3831 ArgChains.push_back(SDValue(L, 1));
3832 }
3833
3834 // Build a tokenfactor for all the chains.
3835 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
3836}
3837
3838bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
3839 bool TailCallOpt) const {
3840 return CallCC == CallingConv::Fast && TailCallOpt;
3841}
3842
3843/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
3844/// and add input and output parameter nodes.
3845SDValue
3846AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
3847 SmallVectorImpl<SDValue> &InVals) const {
3848 SelectionDAG &DAG = CLI.DAG;
3849 SDLoc &DL = CLI.DL;
3850 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
3851 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
3852 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
3853 SDValue Chain = CLI.Chain;
3854 SDValue Callee = CLI.Callee;
3855 bool &IsTailCall = CLI.IsTailCall;
3856 CallingConv::ID CallConv = CLI.CallConv;
3857 bool IsVarArg = CLI.IsVarArg;
3858
3859 MachineFunction &MF = DAG.getMachineFunction();
3860 MachineFunction::CallSiteInfo CSInfo;
3861 bool IsThisReturn = false;
3862
3863 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3864 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3865 bool IsSibCall = false;
3866
3867 if (IsTailCall) {
3868 // Check if it's really possible to do a tail call.
3869 IsTailCall = isEligibleForTailCallOptimization(
3870 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3871 if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
3872 report_fatal_error("failed to perform tail call elimination on a call "
3873 "site marked musttail");
3874
3875 // A sibling call is one where we're under the usual C ABI and not planning
3876 // to change that but can still do a tail call:
3877 if (!TailCallOpt && IsTailCall)
3878 IsSibCall = true;
3879
3880 if (IsTailCall)
3881 ++NumTailCalls;
3882 }
3883
3884 // Analyze operands of the call, assigning locations to each operand.
3885 SmallVector<CCValAssign, 16> ArgLocs;
3886 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
3887 *DAG.getContext());
3888
3889 if (IsVarArg) {
3890 // Handle fixed and variable vector arguments differently.
3891 // Variable vector arguments always go into memory.
3892 unsigned NumArgs = Outs.size();
3893
3894 for (unsigned i = 0; i != NumArgs; ++i) {
3895 MVT ArgVT = Outs[i].VT;
3896 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3897 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
3898 /*IsVarArg=*/ !Outs[i].IsFixed);
3899 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
3900 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3900, __PRETTY_FUNCTION__))
;
3901 (void)Res;
3902 }
3903 } else {
3904 // At this point, Outs[].VT may already be promoted to i32. To correctly
3905 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3906 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3907 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
3908 // we use a special version of AnalyzeCallOperands to pass in ValVT and
3909 // LocVT.
3910 unsigned NumArgs = Outs.size();
3911 for (unsigned i = 0; i != NumArgs; ++i) {
3912 MVT ValVT = Outs[i].VT;
3913 // Get type of the original argument.
3914 EVT ActualVT = getValueType(DAG.getDataLayout(),
3915 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
3916 /*AllowUnknown*/ true);
3917 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
3918 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3919 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3920 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3921 ValVT = MVT::i8;
3922 else if (ActualMVT == MVT::i16)
3923 ValVT = MVT::i16;
3924
3925 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3926 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
3927 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3927, __PRETTY_FUNCTION__))
;
3928 (void)Res;
3929 }
3930 }
3931
3932 // Get a count of how many bytes are to be pushed on the stack.
3933 unsigned NumBytes = CCInfo.getNextStackOffset();
3934
3935 if (IsSibCall) {
3936 // Since we're not changing the ABI to make this a tail call, the memory
3937 // operands are already available in the caller's incoming argument space.
3938 NumBytes = 0;
3939 }
3940
3941 // FPDiff is the byte offset of the call's argument area from the callee's.
3942 // Stores to callee stack arguments will be placed in FixedStackSlots offset
3943 // by this amount for a tail call. In a sibling call it must be 0 because the
3944 // caller will deallocate the entire stack and the callee still expects its
3945 // arguments to begin at SP+0. Completely unused for non-tail calls.
3946 int FPDiff = 0;
3947
3948 if (IsTailCall && !IsSibCall) {
3949 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
3950
3951 // Since callee will pop argument stack as a tail call, we must keep the
3952 // popped size 16-byte aligned.
3953 NumBytes = alignTo(NumBytes, 16);
3954
3955 // FPDiff will be negative if this tail call requires more space than we
3956 // would automatically have in our incoming argument space. Positive if we
3957 // can actually shrink the stack.
3958 FPDiff = NumReusableBytes - NumBytes;
3959
3960 // The stack pointer must be 16-byte aligned at all times it's used for a
3961 // memory operation, which in practice means at *all* times and in
3962 // particular across call boundaries. Therefore our own arguments started at
3963 // a 16-byte aligned SP and the delta applied for the tail call should
3964 // satisfy the same constraint.
3965 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")((FPDiff % 16 == 0 && "unaligned stack on tail call")
? static_cast<void> (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3965, __PRETTY_FUNCTION__))
;
3966 }
3967
3968 // Adjust the stack pointer for the new arguments...
3969 // These operations are automatically eliminated by the prolog/epilog pass
3970 if (!IsSibCall)
3971 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
3972
3973 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
3974 getPointerTy(DAG.getDataLayout()));
3975
3976 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3977 SmallSet<unsigned, 8> RegsUsed;
3978 SmallVector<SDValue, 8> MemOpChains;
3979 auto PtrVT = getPointerTy(DAG.getDataLayout());
3980
3981 if (IsVarArg && CLI.CS && CLI.CS.isMustTailCall()) {
3982 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
3983 for (const auto &F : Forwards) {
3984 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
3985 RegsToPass.emplace_back(F.PReg, Val);
3986 }
3987 }
3988
3989 // Walk the register/memloc assignments, inserting copies/loads.
3990 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3991 CCValAssign &VA = ArgLocs[i];
3992 SDValue Arg = OutVals[i];
3993 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3994
3995 // Promote the value if needed.
3996 switch (VA.getLocInfo()) {
3997 default:
3998 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3998)
;
3999 case CCValAssign::Full:
4000 break;
4001 case CCValAssign::SExt:
4002 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
4003 break;
4004 case CCValAssign::ZExt:
4005 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
4006 break;
4007 case CCValAssign::AExt:
4008 if (Outs[i].ArgVT == MVT::i1) {
4009 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
4010 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
4011 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
4012 }
4013 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
4014 break;
4015 case CCValAssign::AExtUpper:
4016 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4016, __PRETTY_FUNCTION__))
;
4017 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
4018 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
4019 DAG.getConstant(32, DL, VA.getLocVT()));
4020 break;
4021 case CCValAssign::BCvt:
4022 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
4023 break;
4024 case CCValAssign::Trunc:
4025 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
4026 break;
4027 case CCValAssign::FPExt:
4028 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
4029 break;
4030 case CCValAssign::Indirect:
4031 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4032, __PRETTY_FUNCTION__))
4032 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4032, __PRETTY_FUNCTION__))
;
4033 llvm_unreachable("Spilling of SVE vectors not yet implemented")::llvm::llvm_unreachable_internal("Spilling of SVE vectors not yet implemented"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4033)
;
4034 }
4035
4036 if (VA.isRegLoc()) {
4037 if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
4038 Outs[0].VT == MVT::i64) {
4039 assert(VA.getLocVT() == MVT::i64 &&((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4040, __PRETTY_FUNCTION__))
4040 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4040, __PRETTY_FUNCTION__))
;
4041 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4042, __PRETTY_FUNCTION__))
4042 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4042, __PRETTY_FUNCTION__))
;
4043 IsThisReturn = true;
4044 }
4045 if (RegsUsed.count(VA.getLocReg())) {
4046 // If this register has already been used then we're trying to pack
4047 // parts of an [N x i32] into an X-register. The extension type will
4048 // take care of putting the two halves in the right place but we have to
4049 // combine them.
4050 SDValue &Bits =
4051 std::find_if(RegsToPass.begin(), RegsToPass.end(),
4052 [=](const std::pair<unsigned, SDValue> &Elt) {
4053 return Elt.first == VA.getLocReg();
4054 })
4055 ->second;
4056 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
4057 // Call site info is used for function's parameter entry value
4058 // tracking. For now we track only simple cases when parameter
4059 // is transferred through whole register.
4060 CSInfo.erase(std::remove_if(CSInfo.begin(), CSInfo.end(),
4061 [&VA](MachineFunction::ArgRegPair ArgReg) {
4062 return ArgReg.Reg == VA.getLocReg();
4063 }),
4064 CSInfo.end());
4065 } else {
4066 RegsToPass.emplace_back(VA.getLocReg(), Arg);
4067 RegsUsed.insert(VA.getLocReg());
4068 const TargetOptions &Options = DAG.getTarget().Options;
4069 if (Options.EnableDebugEntryValues)
4070 CSInfo.emplace_back(VA.getLocReg(), i);
4071 }
4072 } else {
4073 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4073, __PRETTY_FUNCTION__))
;
4074
4075 SDValue DstAddr;
4076 MachinePointerInfo DstInfo;
4077
4078 // FIXME: This works on big-endian for composite byvals, which are the
4079 // common case. It should also work for fundamental types too.
4080 uint32_t BEAlign = 0;
4081 unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
4082 : VA.getValVT().getSizeInBits();
4083 OpSize = (OpSize + 7) / 8;
4084 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
4085 !Flags.isInConsecutiveRegs()) {
4086 if (OpSize < 8)
4087 BEAlign = 8 - OpSize;
4088 }
4089 unsigned LocMemOffset = VA.getLocMemOffset();
4090 int32_t Offset = LocMemOffset + BEAlign;
4091 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
4092 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
4093
4094 if (IsTailCall) {
4095 Offset = Offset + FPDiff;
4096 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4097
4098 DstAddr = DAG.getFrameIndex(FI, PtrVT);
4099 DstInfo =
4100 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
4101
4102 // Make sure any stack arguments overlapping with where we're storing
4103 // are loaded before this eventual operation. Otherwise they'll be
4104 // clobbered.
4105 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
4106 } else {
4107 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
4108
4109 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
4110 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
4111 LocMemOffset);
4112 }
4113
4114 if (Outs[i].Flags.isByVal()) {
4115 SDValue SizeNode =
4116 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
4117 SDValue Cpy = DAG.getMemcpy(
4118 Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
4119 /*isVol = */ false, /*AlwaysInline = */ false,
4120 /*isTailCall = */ false,
4121 DstInfo, MachinePointerInfo());
4122
4123 MemOpChains.push_back(Cpy);
4124 } else {
4125 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
4126 // promoted to a legal register type i32, we should truncate Arg back to
4127 // i1/i8/i16.
4128 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
4129 VA.getValVT() == MVT::i16)
4130 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
4131
4132 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
4133 MemOpChains.push_back(Store);
4134 }
4135 }
4136 }
4137
4138 if (!MemOpChains.empty())
4139 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4140
4141 // Build a sequence of copy-to-reg nodes chained together with token chain
4142 // and flag operands which copy the outgoing args into the appropriate regs.
4143 SDValue InFlag;
4144 for (auto &RegToPass : RegsToPass) {
4145 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
4146 RegToPass.second, InFlag);
4147 InFlag = Chain.getValue(1);
4148 }
4149
4150 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
4151 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
4152 // node so that legalize doesn't hack it.
4153 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4154 auto GV = G->getGlobal();
4155 unsigned OpFlags =
4156 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
4157 if (OpFlags & AArch64II::MO_GOT) {
4158 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
4159 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
4160 } else {
4161 const GlobalValue *GV = G->getGlobal();
4162 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
4163 }
4164 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4165 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4166 Subtarget->isTargetMachO()) {
4167 const char *Sym = S->getSymbol();
4168 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
4169 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
4170 } else {
4171 const char *Sym = S->getSymbol();
4172 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
4173 }
4174 }
4175
4176 // We don't usually want to end the call-sequence here because we would tidy
4177 // the frame up *after* the call, however in the ABI-changing tail-call case
4178 // we've carefully laid out the parameters so that when sp is reset they'll be
4179 // in the correct location.
4180 if (IsTailCall && !IsSibCall) {
4181 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
4182 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
4183 InFlag = Chain.getValue(1);
4184 }
4185
4186 std::vector<SDValue> Ops;
4187 Ops.push_back(Chain);
4188 Ops.push_back(Callee);
4189
4190 if (IsTailCall) {
4191 // Each tail call may have to adjust the stack by a different amount, so
4192 // this information must travel along with the operation for eventual
4193 // consumption by emitEpilogue.
4194 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
4195 }
4196
4197 // Add argument registers to the end of the list so that they are known live
4198 // into the call.
4199 for (auto &RegToPass : RegsToPass)
4200 Ops.push_back(DAG.getRegister(RegToPass.first,
4201 RegToPass.second.getValueType()));
4202
4203 // Check callee args/returns for SVE registers and set calling convention
4204 // accordingly.
4205 if (CallConv == CallingConv::C) {
4206 bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
4207 return Out.VT.isScalableVector();
4208 });
4209 bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
4210 return In.VT.isScalableVector();
4211 });
4212
4213 if (CalleeInSVE || CalleeOutSVE)
4214 CallConv = CallingConv::AArch64_SVE_VectorCall;
4215 }
4216
4217 // Add a register mask operand representing the call-preserved registers.
4218 const uint32_t *Mask;
4219 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4220 if (IsThisReturn) {
4221 // For 'this' returns, use the X0-preserving mask if applicable
4222 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
4223 if (!Mask) {
4224 IsThisReturn = false;
4225 Mask = TRI->getCallPreservedMask(MF, CallConv);
4226 }
4227 } else
4228 Mask = TRI->getCallPreservedMask(MF, CallConv);
4229
4230 if (Subtarget->hasCustomCallingConv())
4231 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
4232
4233 if (TRI->isAnyArgRegReserved(MF))
4234 TRI->emitReservedArgRegCallError(MF);
4235
4236 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4236, __PRETTY_FUNCTION__))
;
4237 Ops.push_back(DAG.getRegisterMask(Mask));
4238
4239 if (InFlag.getNode())
4240 Ops.push_back(InFlag);
4241
4242 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4243
4244 // If we're doing a tall call, use a TC_RETURN here rather than an
4245 // actual call instruction.
4246 if (IsTailCall) {
4247 MF.getFrameInfo().setHasTailCall();
4248 SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
4249 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4250 return Ret;
4251 }
4252
4253 // Returns a chain and a flag for retval copy to use.
4254 Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
4255 InFlag = Chain.getValue(1);
4256 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4257
4258 uint64_t CalleePopBytes =
4259 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
4260
4261 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
4262 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
4263 InFlag, DL);
4264 if (!Ins.empty())
4265 InFlag = Chain.getValue(1);
4266
4267 // Handle result values, copying them out of physregs into vregs that we
4268 // return.
4269 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
4270 InVals, IsThisReturn,
4271 IsThisReturn ? OutVals[0] : SDValue());
4272}
4273
4274bool AArch64TargetLowering::CanLowerReturn(
4275 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
4276 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4277 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
4278 ? RetCC_AArch64_WebKit_JS
4279 : RetCC_AArch64_AAPCS;
4280 SmallVector<CCValAssign, 16> RVLocs;
4281 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
4282 return CCInfo.CheckReturn(Outs, RetCC);
4283}
4284
4285SDValue
4286AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
4287 bool isVarArg,
4288 const SmallVectorImpl<ISD::OutputArg> &Outs,
4289 const SmallVectorImpl<SDValue> &OutVals,
4290 const SDLoc &DL, SelectionDAG &DAG) const {
4291 auto &MF = DAG.getMachineFunction();
4292 auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4293
4294 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
4295 ? RetCC_AArch64_WebKit_JS
4296 : RetCC_AArch64_AAPCS;
4297 SmallVector<CCValAssign, 16> RVLocs;
4298 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4299 *DAG.getContext());
4300 CCInfo.AnalyzeReturn(Outs, RetCC);
4301
4302 // Copy the result values into the output registers.
4303 SDValue Flag;
4304 SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
4305 SmallSet<unsigned, 4> RegsUsed;
4306 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
4307 ++i, ++realRVLocIdx) {
4308 CCValAssign &VA = RVLocs[i];
4309 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4309, __PRETTY_FUNCTION__))
;
4310 SDValue Arg = OutVals[realRVLocIdx];
4311
4312 switch (VA.getLocInfo()) {
4313 default:
4314 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4314)
;
4315 case CCValAssign::Full:
4316 if (Outs[i].ArgVT == MVT::i1) {
4317 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
4318 // value. This is strictly redundant on Darwin (which uses "zeroext
4319 // i1"), but will be optimised out before ISel.
4320 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
4321 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
4322 }
4323 break;
4324 case CCValAssign::BCvt:
4325 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
4326 break;
4327 case CCValAssign::AExt:
4328 case CCValAssign::ZExt:
4329 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
4330 break;
4331 case CCValAssign::AExtUpper:
4332 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4332, __PRETTY_FUNCTION__))
;
4333 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
4334 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
4335 DAG.getConstant(32, DL, VA.getLocVT()));
4336 break;
4337 }
4338
4339 if (RegsUsed.count(VA.getLocReg())) {
4340 SDValue &Bits =
4341 std::find_if(RetVals.begin(), RetVals.end(),
4342 [=](const std::pair<unsigned, SDValue> &Elt) {
4343 return Elt.first == VA.getLocReg();
4344 })
4345 ->second;
4346 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
4347 } else {
4348 RetVals.emplace_back(VA.getLocReg(), Arg);
4349 RegsUsed.insert(VA.getLocReg());
4350 }
4351 }
4352
4353 SmallVector<SDValue, 4> RetOps(1, Chain);
4354 for (auto &RetVal : RetVals) {
4355 Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
4356 Flag = Chain.getValue(1);
4357 RetOps.push_back(
4358 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
4359 }
4360
4361 // Windows AArch64 ABIs require that for returning structs by value we copy
4362 // the sret argument into X0 for the return.
4363 // We saved the argument into a virtual register in the entry block,
4364 // so now we copy the value out and into X0.
4365 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
4366 SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
4367 getPointerTy(MF.getDataLayout()));
4368
4369 unsigned RetValReg = AArch64::X0;
4370 Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
4371 Flag = Chain.getValue(1);
4372
4373 RetOps.push_back(
4374 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
4375 }
4376
4377 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4378 const MCPhysReg *I =
4379 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
4380 if (I) {
4381 for (; *I; ++I) {
4382 if (AArch64::GPR64RegClass.contains(*I))
4383 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
4384 else if (AArch64::FPR64RegClass.contains(*I))
4385 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
4386 else
4387 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4387)
;
4388 }
4389 }
4390
4391 RetOps[0] = Chain; // Update chain.
4392
4393 // Add the flag if we have it.
4394 if (Flag.getNode())
4395 RetOps.push_back(Flag);
4396
4397 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
4398}
4399
4400//===----------------------------------------------------------------------===//
4401// Other Lowering Code
4402//===----------------------------------------------------------------------===//
4403
4404SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
4405 SelectionDAG &DAG,
4406 unsigned Flag) const {
4407 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
4408 N->getOffset(), Flag);
4409}
4410
4411SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
4412 SelectionDAG &DAG,
4413 unsigned Flag) const {
4414 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
4415}
4416
4417SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
4418 SelectionDAG &DAG,
4419 unsigned Flag) const {
4420 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
4421 N->getOffset(), Flag);
4422}
4423
4424SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
4425 SelectionDAG &DAG,
4426 unsigned Flag) const {
4427 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
4428}
4429
4430// (loadGOT sym)
4431template <class NodeTy>
4432SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
4433 unsigned Flags) const {
4434 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
4435 SDLoc DL(N);
4436 EVT Ty = getPointerTy(DAG.getDataLayout());
4437 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
4438 // FIXME: Once remat is capable of dealing with instructions with register
4439 // operands, expand this into two nodes instead of using a wrapper node.
4440 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
4441}
4442
4443// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
4444template <class NodeTy>
4445SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
4446 unsigned Flags) const {
4447 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
4448 SDLoc DL(N);
4449 EVT Ty = getPointerTy(DAG.getDataLayout());
4450 const unsigned char MO_NC = AArch64II::MO_NC;
4451 return DAG.getNode(
4452 AArch64ISD::WrapperLarge, DL, Ty,
4453 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
4454 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
4455 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
4456 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
4457}
4458
4459// (addlow (adrp %hi(sym)) %lo(sym))
4460template <class NodeTy>
4461SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4462 unsigned Flags) const {
4463 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
4464 SDLoc DL(N);
4465 EVT Ty = getPointerTy(DAG.getDataLayout());
4466 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
4467 SDValue Lo = getTargetNode(N, Ty, DAG,
4468 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
4469 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
4470 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
4471}
4472
4473// (adr sym)
4474template <class NodeTy>
4475SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
4476 unsigned Flags) const {
4477 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrTiny\n"
; } } while (false)
;
4478 SDLoc DL(N);
4479 EVT Ty = getPointerTy(DAG.getDataLayout());
4480 SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
4481 return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
4482}
4483
4484SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
4485 SelectionDAG &DAG) const {
4486 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
4487 const GlobalValue *GV = GN->getGlobal();
4488 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
4489
4490 if (OpFlags != AArch64II::MO_NO_FLAG)
4491 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4492, __PRETTY_FUNCTION__))
4492 "unexpected offset in global node")((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4492, __PRETTY_FUNCTION__))
;
4493
4494 // This also catches the large code model case for Darwin, and tiny code
4495 // model with got relocations.
4496 if ((OpFlags & AArch64II::MO_GOT) != 0) {
4497 return getGOT(GN, DAG, OpFlags);
4498 }
4499
4500 SDValue Result;
4501 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4502 Result = getAddrLarge(GN, DAG, OpFlags);
4503 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4504 Result = getAddrTiny(GN, DAG, OpFlags);
4505 } else {
4506 Result = getAddr(GN, DAG, OpFlags);
4507 }
4508 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4509 SDLoc DL(GN);
4510 if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
4511 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4512 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
4513 return Result;
4514}
4515
4516/// Convert a TLS address reference into the correct sequence of loads
4517/// and calls to compute the variable's address (for Darwin, currently) and
4518/// return an SDValue containing the final node.
4519
4520/// Darwin only has one TLS scheme which must be capable of dealing with the
4521/// fully general situation, in the worst case. This means:
4522/// + "extern __thread" declaration.
4523/// + Defined in a possibly unknown dynamic library.
4524///
4525/// The general system is that each __thread variable has a [3 x i64] descriptor
4526/// which contains information used by the runtime to calculate the address. The
4527/// only part of this the compiler needs to know about is the first xword, which
4528/// contains a function pointer that must be called with the address of the
4529/// entire descriptor in "x0".
4530///
4531/// Since this descriptor may be in a different unit, in general even the
4532/// descriptor must be accessed via an indirect load. The "ideal" code sequence
4533/// is:
4534/// adrp x0, _var@TLVPPAGE
4535/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
4536/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
4537/// ; the function pointer
4538/// blr x1 ; Uses descriptor address in x0
4539/// ; Address of _var is now in x0.
4540///
4541/// If the address of _var's descriptor *is* known to the linker, then it can
4542/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
4543/// a slight efficiency gain.
4544SDValue
4545AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
4546 SelectionDAG &DAG) const {
4547 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4548, __PRETTY_FUNCTION__))
4548 "This function expects a Darwin target")((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4548, __PRETTY_FUNCTION__))
;
4549
4550 SDLoc DL(Op);
4551 MVT PtrVT = getPointerTy(DAG.getDataLayout());
4552 MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
4553 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4554
4555 SDValue TLVPAddr =
4556 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4557 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
4558
4559 // The first entry in the descriptor is a function pointer that we must call
4560 // to obtain the address of the variable.
4561 SDValue Chain = DAG.getEntryNode();
4562 SDValue FuncTLVGet = DAG.getLoad(
4563 PtrMemVT, DL, Chain, DescAddr,
4564 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
4565 /* Alignment = */ PtrMemVT.getSizeInBits() / 8,
4566 MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
4567 Chain = FuncTLVGet.getValue(1);
4568
4569 // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
4570 FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
4571
4572 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4573 MFI.setAdjustsStack(true);
4574
4575 // TLS calls preserve all registers except those that absolutely must be
4576 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
4577 // silly).
4578 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4579 const uint32_t *Mask = TRI->getTLSCallPreservedMask();
4580 if (Subtarget->hasCustomCallingConv())
4581 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
4582
4583 // Finally, we can make the call. This is just a degenerate version of a
4584 // normal AArch64 call node: x0 takes the address of the descriptor, and
4585 // returns the address of the variable in this thread.
4586 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
4587 Chain =
4588 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
4589 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
4590 DAG.getRegisterMask(Mask), Chain.getValue(1));
4591 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
4592}
4593
4594/// When accessing thread-local variables under either the general-dynamic or
4595/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
4596/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
4597/// is a function pointer to carry out the resolution.
4598///
4599/// The sequence is:
4600/// adrp x0, :tlsdesc:var
4601/// ldr x1, [x0, #:tlsdesc_lo12:var]
4602/// add x0, x0, #:tlsdesc_lo12:var
4603/// .tlsdesccall var
4604/// blr x1
4605/// (TPIDR_EL0 offset now in x0)
4606///
4607/// The above sequence must be produced unscheduled, to enable the linker to
4608/// optimize/relax this sequence.
4609/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
4610/// above sequence, and expanded really late in the compilation flow, to ensure
4611/// the sequence is produced as per above.
4612SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
4613 const SDLoc &DL,
4614 SelectionDAG &DAG) const {
4615 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4616
4617 SDValue Chain = DAG.getEntryNode();
4618 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4619
4620 Chain =
4621 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
4622 SDValue Glue = Chain.getValue(1);
4623
4624 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
4625}
4626
4627SDValue
4628AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
4629 SelectionDAG &DAG) const {
4630 assert(Subtarget->isTargetELF() && "This function expects an ELF target")((Subtarget->isTargetELF() && "This function expects an ELF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4630, __PRETTY_FUNCTION__))
;
4631 if (getTargetMachine().getCodeModel() == CodeModel::Large)
4632 report_fatal_error("ELF TLS only supported in small memory model");
4633 // Different choices can be made for the maximum size of the TLS area for a
4634 // module. For the small address model, the default TLS size is 16MiB and the
4635 // maximum TLS size is 4GiB.
4636 // FIXME: add -mtls-size command line option and make it control the 16MiB
4637 // vs. 4GiB code sequence generation.
4638 // FIXME: add tiny codemodel support. We currently generate the same code as
4639 // small, which may be larger than needed.
4640 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4641
4642 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
4643
4644 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
4645 if (Model == TLSModel::LocalDynamic)
4646 Model = TLSModel::GeneralDynamic;
4647 }
4648
4649 SDValue TPOff;
4650 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4651 SDLoc DL(Op);
4652 const GlobalValue *GV = GA->getGlobal();
4653
4654 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
4655
4656 if (Model == TLSModel::LocalExec) {
4657 SDValue HiVar = DAG.getTargetGlobalAddress(
4658 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4659 SDValue LoVar = DAG.getTargetGlobalAddress(
4660 GV, DL, PtrVT, 0,
4661 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4662
4663 SDValue TPWithOff_lo =
4664 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
4665 HiVar,
4666 DAG.getTargetConstant(0, DL, MVT::i32)),
4667 0);
4668 SDValue TPWithOff =
4669 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
4670 LoVar,
4671 DAG.getTargetConstant(0, DL, MVT::i32)),
4672 0);
4673 return TPWithOff;
4674 } else if (Model == TLSModel::InitialExec) {
4675 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4676 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
4677 } else if (Model == TLSModel::LocalDynamic) {
4678 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
4679 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
4680 // the beginning of the module's TLS region, followed by a DTPREL offset
4681 // calculation.
4682
4683 // These accesses will need deduplicating if there's more than one.
4684 AArch64FunctionInfo *MFI =
4685 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4686 MFI->incNumLocalDynamicTLSAccesses();
4687
4688 // The call needs a relocation too for linker relaxation. It doesn't make
4689 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4690 // the address.
4691 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
4692 AArch64II::MO_TLS);
4693
4694 // Now we can calculate the offset from TPIDR_EL0 to this module's
4695 // thread-local area.
4696 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4697
4698 // Now use :dtprel_whatever: operations to calculate this variable's offset
4699 // in its thread-storage area.
4700 SDValue HiVar = DAG.getTargetGlobalAddress(
4701 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4702 SDValue LoVar = DAG.getTargetGlobalAddress(
4703 GV, DL, MVT::i64, 0,
4704 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4705
4706 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
4707 DAG.getTargetConstant(0, DL, MVT::i32)),
4708 0);
4709 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
4710 DAG.getTargetConstant(0, DL, MVT::i32)),
4711 0);
4712 } else if (Model == TLSModel::GeneralDynamic) {
4713 // The call needs a relocation too for linker relaxation. It doesn't make
4714 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4715 // the address.
4716 SDValue SymAddr =
4717 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4718
4719 // Finally we can make a call to calculate the offset from tpidr_el0.
4720 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4721 } else
4722 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4722)
;
4723
4724 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
4725}
4726
4727SDValue
4728AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
4729 SelectionDAG &DAG) const {
4730 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")((Subtarget->isTargetWindows() && "Windows specific TLS lowering"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4730, __PRETTY_FUNCTION__))
;
4731
4732 SDValue Chain = DAG.getEntryNode();
4733 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4734 SDLoc DL(Op);
4735
4736 SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
4737
4738 // Load the ThreadLocalStoragePointer from the TEB
4739 // A pointer to the TLS array is located at offset 0x58 from the TEB.
4740 SDValue TLSArray =
4741 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
4742 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
4743 Chain = TLSArray.getValue(1);
4744
4745 // Load the TLS index from the C runtime;
4746 // This does the same as getAddr(), but without having a GlobalAddressSDNode.
4747 // This also does the same as LOADgot, but using a generic i32 load,
4748 // while LOADgot only loads i64.
4749 SDValue TLSIndexHi =
4750 DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
4751 SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
4752 "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4753 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
4754 SDValue TLSIndex =
4755 DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
4756 TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
4757 Chain = TLSIndex.getValue(1);
4758
4759 // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
4760 // offset into the TLSArray.
4761 TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
4762 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
4763 DAG.getConstant(3, DL, PtrVT));
4764 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
4765 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
4766 MachinePointerInfo());
4767 Chain = TLS.getValue(1);
4768
4769 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4770 const GlobalValue *GV = GA->getGlobal();
4771 SDValue TGAHi = DAG.getTargetGlobalAddress(
4772 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4773 SDValue TGALo = DAG.getTargetGlobalAddress(
4774 GV, DL, PtrVT, 0,
4775 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4776
4777 // Add the offset from the start of the .tls section (section base).
4778 SDValue Addr =
4779 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
4780 DAG.getTargetConstant(0, DL, MVT::i32)),
4781 0);
4782 Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
4783 return Addr;
4784}
4785
4786SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
4787 SelectionDAG &DAG) const {
4788 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4789 if (DAG.getTarget().useEmulatedTLS())
4790 return LowerToTLSEmulatedModel(GA, DAG);
4791
4792 if (Subtarget->isTargetDarwin())
4793 return LowerDarwinGlobalTLSAddress(Op, DAG);
4794 if (Subtarget->isTargetELF())
4795 return LowerELFGlobalTLSAddress(Op, DAG);
4796 if (Subtarget->isTargetWindows())
4797 return LowerWindowsGlobalTLSAddress(Op, DAG);
4798
4799 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4799)
;
4800}
4801
4802SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4803 SDValue Chain = Op.getOperand(0);
4804 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4805 SDValue LHS = Op.getOperand(2);
4806 SDValue RHS = Op.getOperand(3);
4807 SDValue Dest = Op.getOperand(4);
4808 SDLoc dl(Op);
4809
4810 MachineFunction &MF = DAG.getMachineFunction();
4811 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
4812 // will not be produced, as they are conditional branch instructions that do
4813 // not set flags.
4814 bool ProduceNonFlagSettingCondBr =
4815 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
4816
4817 // Handle f128 first, since lowering it will result in comparing the return
4818 // value of a libcall against zero, which is just what the rest of LowerBR_CC
4819 // is expecting to deal with.
4820 if (LHS.getValueType() == MVT::f128) {
4821 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
4822
4823 // If softenSetCCOperands returned a scalar, we need to compare the result
4824 // against zero to select between true and false values.
4825 if (!RHS.getNode()) {
4826 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4827 CC = ISD::SETNE;
4828 }
4829 }
4830
4831 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4832 // instruction.
4833 if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
4834 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4835 // Only lower legal XALUO ops.
4836 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4837 return SDValue();
4838
4839 // The actual operation with overflow check.
4840 AArch64CC::CondCode OFCC;
4841 SDValue Value, Overflow;
4842 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
4843
4844 if (CC == ISD::SETNE)
4845 OFCC = getInvertedCondCode(OFCC);
4846 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
4847
4848 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4849 Overflow);
4850 }
4851
4852 if (LHS.getValueType().isInteger()) {
4853 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4854, __PRETTY_FUNCTION__))
4854 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4854, __PRETTY_FUNCTION__))
;
4855
4856 // If the RHS of the comparison is zero, we can potentially fold this
4857 // to a specialized branch.
4858 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
4859 if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
4860 if (CC == ISD::SETEQ) {
4861 // See if we can use a TBZ to fold in an AND as well.
4862 // TBZ has a smaller branch displacement than CBZ. If the offset is
4863 // out of bounds, a late MI-layer pass rewrites branches.
4864 // 403.gcc is an example that hits this case.
4865 if (LHS.getOpcode() == ISD::AND &&
4866 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4867 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4868 SDValue Test = LHS.getOperand(0);
4869 uint64_t Mask = LHS.getConstantOperandVal(1);
4870 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
4871 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4872 Dest);
4873 }
4874
4875 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
4876 } else if (CC == ISD::SETNE) {
4877 // See if we can use a TBZ to fold in an AND as well.
4878 // TBZ has a smaller branch displacement than CBZ. If the offset is
4879 // out of bounds, a late MI-layer pass rewrites branches.
4880 // 403.gcc is an example that hits this case.
4881 if (LHS.getOpcode() == ISD::AND &&
4882 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4883 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4884 SDValue Test = LHS.getOperand(0);
4885 uint64_t Mask = LHS.getConstantOperandVal(1);
4886 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
4887 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4888 Dest);
4889 }
4890
4891 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
4892 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
4893 // Don't combine AND since emitComparison converts the AND to an ANDS
4894 // (a.k.a. TST) and the test in the test bit and branch instruction
4895 // becomes redundant. This would also increase register pressure.
4896 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4897 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
4898 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4899 }
4900 }
4901 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
4902 LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
4903 // Don't combine AND since emitComparison converts the AND to an ANDS
4904 // (a.k.a. TST) and the test in the test bit and branch instruction
4905 // becomes redundant. This would also increase register pressure.
4906 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4907 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
4908 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4909 }
4910
4911 SDValue CCVal;
4912 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4913 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4914 Cmp);
4915 }
4916
4917 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4918, __PRETTY_FUNCTION__))
4918 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4918, __PRETTY_FUNCTION__))
;
4919
4920 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4921 // clean. Some of them require two branches to implement.
4922 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4923 AArch64CC::CondCode CC1, CC2;
4924 changeFPCCToAArch64CC(CC, CC1, CC2);
4925 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4926 SDValue BR1 =
4927 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
4928 if (CC2 != AArch64CC::AL) {
4929 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4930 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
4931 Cmp);
4932 }
4933
4934 return BR1;
4935}
4936
4937SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
4938 SelectionDAG &DAG) const {
4939 EVT VT = Op.getValueType();
4940 SDLoc DL(Op);
4941
4942 SDValue In1 = Op.getOperand(0);
4943 SDValue In2 = Op.getOperand(1);
4944 EVT SrcVT = In2.getValueType();
4945
4946 if (SrcVT.bitsLT(VT))
4947 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
4948 else if (SrcVT.bitsGT(VT))
4949 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
4950
4951 EVT VecVT;
4952 uint64_t EltMask;
4953 SDValue VecVal1, VecVal2;
4954
4955 auto setVecVal = [&] (int Idx) {
4956 if (!VT.isVector()) {
4957 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4958 DAG.getUNDEF(VecVT), In1);
4959 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4960 DAG.getUNDEF(VecVT), In2);
4961 } else {
4962 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
4963 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
4964 }
4965 };
4966
4967 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
4968 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
4969 EltMask = 0x80000000ULL;
4970 setVecVal(AArch64::ssub);
4971 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
4972 VecVT = MVT::v2i64;
4973
4974 // We want to materialize a mask with the high bit set, but the AdvSIMD
4975 // immediate moves cannot materialize that in a single instruction for
4976 // 64-bit elements. Instead, materialize zero and then negate it.
4977 EltMask = 0;
4978
4979 setVecVal(AArch64::dsub);
4980 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
4981 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
4982 EltMask = 0x8000ULL;
4983 setVecVal(AArch64::hsub);
4984 } else {
4985 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4985)
;
4986 }
4987
4988 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
4989
4990 // If we couldn't materialize the mask above, then the mask vector will be
4991 // the zero vector, and we need to negate it here.
4992 if (VT == MVT::f64 || VT == MVT::v2f64) {
4993 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
4994 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
4995 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
4996 }
4997
4998 SDValue Sel =
4999 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
5000
5001 if (VT == MVT::f16)
5002 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
5003 if (VT == MVT::f32)
5004 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
5005 else if (VT == MVT::f64)
5006 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
5007 else
5008 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
5009}
5010
5011SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
5012 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
5013 Attribute::NoImplicitFloat))
5014 return SDValue();
5015
5016 if (!Subtarget->hasNEON())
5017 return SDValue();
5018
5019 // While there is no integer popcount instruction, it can
5020 // be more efficiently lowered to the following sequence that uses
5021 // AdvSIMD registers/instructions as long as the copies to/from
5022 // the AdvSIMD registers are cheap.
5023 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
5024 // CNT V0.8B, V0.8B // 8xbyte pop-counts
5025 // ADDV B0, V0.8B // sum 8xbyte pop-counts
5026 // UMOV X0, V0.B[0] // copy byte result back to integer reg
5027 SDValue Val = Op.getOperand(0);
5028 SDLoc DL(Op);
5029 EVT VT = Op.getValueType();
5030
5031 if (VT == MVT::i32 || VT == MVT::i64) {
5032 if (VT == MVT::i32)
5033 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
5034 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
5035
5036 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
5037 SDValue UaddLV = DAG.getNode(
5038 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
5039 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
5040
5041 if (VT == MVT::i64)
5042 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
5043 return UaddLV;
5044 }
5045
5046 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5048, __PRETTY_FUNCTION__))
5047 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5048, __PRETTY_FUNCTION__))
5048 "Unexpected type for custom ctpop lowering")(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5048, __PRETTY_FUNCTION__))
;
5049
5050 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5051 Val = DAG.getBitcast(VT8Bit, Val);
5052 Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
5053
5054 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
5055 unsigned EltSize = 8;
5056 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
5057 while (EltSize != VT.getScalarSizeInBits()) {
5058 EltSize *= 2;
5059 NumElts /= 2;
5060 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
5061 Val = DAG.getNode(
5062 ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
5063 DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
5064 }
5065
5066 return Val;
5067}
5068
5069SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
5070
5071 if (Op.getValueType().isVector())
5072 return LowerVSETCC(Op, DAG);
5073
5074 SDValue LHS = Op.getOperand(0);
5075 SDValue RHS = Op.getOperand(1);
5076 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
5077 SDLoc dl(Op);
5078
5079 // We chose ZeroOrOneBooleanContents, so use zero and one.
5080 EVT VT = Op.getValueType();
5081 SDValue TVal = DAG.getConstant(1, dl, VT);
5082 SDValue FVal = DAG.getConstant(0, dl, VT);
5083
5084 // Handle f128 first, since one possible outcome is a normal integer
5085 // comparison which gets picked up by the next if statement.
5086 if (LHS.getValueType() == MVT::f128) {
5087 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
5088
5089 // If softenSetCCOperands returned a scalar, use it.
5090 if (!RHS.getNode()) {
5091 assert(LHS.getValueType() == Op.getValueType() &&((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5092, __PRETTY_FUNCTION__))
5092 "Unexpected setcc expansion!")((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5092, __PRETTY_FUNCTION__))
;
5093 return LHS;
5094 }
5095 }
5096
5097 if (LHS.getValueType().isInteger()) {
5098 SDValue CCVal;
5099 SDValue Cmp = getAArch64Cmp(
5100 LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
5101
5102 // Note that we inverted the condition above, so we reverse the order of
5103 // the true and false operands here. This will allow the setcc to be
5104 // matched to a single CSINC instruction.
5105 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
5106 }
5107
5108 // Now we know we're dealing with FP values.
5109 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5110, __PRETTY_FUNCTION__))
5110 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5110, __PRETTY_FUNCTION__))
;
5111
5112 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
5113 // and do the comparison.
5114 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
5115
5116 AArch64CC::CondCode CC1, CC2;
5117 changeFPCCToAArch64CC(CC, CC1, CC2);
5118 if (CC2 == AArch64CC::AL) {
5119 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
5120 CC2);
5121 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5122
5123 // Note that we inverted the condition above, so we reverse the order of
5124 // the true and false operands here. This will allow the setcc to be
5125 // matched to a single CSINC instruction.
5126 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
5127 } else {
5128 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
5129 // totally clean. Some of them require two CSELs to implement. As is in
5130 // this case, we emit the first CSEL and then emit a second using the output
5131 // of the first as the RHS. We're effectively OR'ing the two CC's together.
5132
5133 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
5134 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5135 SDValue CS1 =
5136 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
5137
5138 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
5139 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
5140 }
5141}
5142
5143SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
5144 SDValue RHS, SDValue TVal,
5145 SDValue FVal, const SDLoc &dl,
5146 SelectionDAG &DAG) const {
5147 // Handle f128 first, because it will result in a comparison of some RTLIB
5148 // call result against zero.
5149 if (LHS.getValueType() == MVT::f128) {
5150 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
5151
5152 // If softenSetCCOperands returned a scalar, we need to compare the result
5153 // against zero to select between true and false values.
5154 if (!RHS.getNode()) {
5155 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5156 CC = ISD::SETNE;
5157 }
5158 }
5159
5160 // Also handle f16, for which we need to do a f32 comparison.
5161 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5162 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
5163 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
5164 }
5165
5166 // Next, handle integers.
5167 if (LHS.getValueType().isInteger()) {
5168 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5169, __PRETTY_FUNCTION__))
5169 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5169, __PRETTY_FUNCTION__))
;
5170
5171 unsigned Opcode = AArch64ISD::CSEL;
5172
5173 // If both the TVal and the FVal are constants, see if we can swap them in
5174 // order to for a CSINV or CSINC out of them.
5175 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
5176 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
5177
5178 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
5179 std::swap(TVal, FVal);
5180 std::swap(CTVal, CFVal);
5181 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5182 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
5183 std::swap(TVal, FVal);
5184 std::swap(CTVal, CFVal);
5185 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5186 } else if (TVal.getOpcode() == ISD::XOR) {
5187 // If TVal is a NOT we want to swap TVal and FVal so that we can match
5188 // with a CSINV rather than a CSEL.
5189 if (isAllOnesConstant(TVal.getOperand(1))) {
5190 std::swap(TVal, FVal);
5191 std::swap(CTVal, CFVal);
5192 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5193 }
5194 } else if (TVal.getOpcode() == ISD::SUB) {
5195 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
5196 // that we can match with a CSNEG rather than a CSEL.
5197 if (isNullConstant(TVal.getOperand(0))) {
5198 std::swap(TVal, FVal);
5199 std::swap(CTVal, CFVal);
5200 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5201 }
5202 } else if (CTVal && CFVal) {
5203 const int64_t TrueVal = CTVal->getSExtValue();
5204 const int64_t FalseVal = CFVal->getSExtValue();
5205 bool Swap = false;
5206
5207 // If both TVal and FVal are constants, see if FVal is the
5208 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
5209 // instead of a CSEL in that case.
5210 if (TrueVal == ~FalseVal) {
5211 Opcode = AArch64ISD::CSINV;
5212 } else if (TrueVal == -FalseVal) {
5213 Opcode = AArch64ISD::CSNEG;
5214 } else if (TVal.getValueType() == MVT::i32) {
5215 // If our operands are only 32-bit wide, make sure we use 32-bit
5216 // arithmetic for the check whether we can use CSINC. This ensures that
5217 // the addition in the check will wrap around properly in case there is
5218 // an overflow (which would not be the case if we do the check with
5219 // 64-bit arithmetic).
5220 const uint32_t TrueVal32 = CTVal->getZExtValue();
5221 const uint32_t FalseVal32 = CFVal->getZExtValue();
5222
5223 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
5224 Opcode = AArch64ISD::CSINC;
5225
5226 if (TrueVal32 > FalseVal32) {
5227 Swap = true;
5228 }
5229 }
5230 // 64-bit check whether we can use CSINC.
5231 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
5232 Opcode = AArch64ISD::CSINC;
5233
5234 if (TrueVal > FalseVal) {
5235 Swap = true;
5236 }
5237 }
5238
5239 // Swap TVal and FVal if necessary.
5240 if (Swap) {
5241 std::swap(TVal, FVal);
5242 std::swap(CTVal, CFVal);
5243 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5244 }
5245
5246 if (Opcode != AArch64ISD::CSEL) {
5247 // Drop FVal since we can get its value by simply inverting/negating
5248 // TVal.
5249 FVal = TVal;
5250 }
5251 }
5252
5253 // Avoid materializing a constant when possible by reusing a known value in
5254 // a register. However, don't perform this optimization if the known value
5255 // is one, zero or negative one in the case of a CSEL. We can always
5256 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
5257 // FVal, respectively.
5258 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
5259 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
5260 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
5261 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
5262 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
5263 // "a != C ? x : a" to avoid materializing C.
5264 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
5265 TVal = LHS;
5266 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
5267 FVal = LHS;
5268 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
5269 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")((CTVal && CFVal && "Expected constant operands for CSNEG."
) ? static_cast<void> (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5269, __PRETTY_FUNCTION__))
;
5270 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
5271 // avoid materializing C.
5272 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
5273 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
5274 Opcode = AArch64ISD::CSINV;
5275 TVal = LHS;
5276 FVal = DAG.getConstant(0, dl, FVal.getValueType());
5277 }
5278 }
5279
5280 SDValue CCVal;
5281 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
5282 EVT VT = TVal.getValueType();
5283 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
5284 }
5285
5286 // Now we know we're dealing with FP values.
5287 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5288, __PRETTY_FUNCTION__))
5288 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5288, __PRETTY_FUNCTION__))
;
5289 assert(LHS.getValueType() == RHS.getValueType())((LHS.getValueType() == RHS.getValueType()) ? static_cast<
void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5289, __PRETTY_FUNCTION__))
;
5290 EVT VT = TVal.getValueType();
5291 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
5292
5293 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
5294 // clean. Some of them require two CSELs to implement.
5295 AArch64CC::CondCode CC1, CC2;
5296 changeFPCCToAArch64CC(CC, CC1, CC2);
5297
5298 if (DAG.getTarget().Options.UnsafeFPMath) {
5299 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
5300 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
5301 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
5302 if (RHSVal && RHSVal->isZero()) {
5303 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
5304 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
5305
5306 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
5307 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
5308 TVal = LHS;
5309 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
5310 CFVal && CFVal->isZero() &&
5311 FVal.getValueType() == LHS.getValueType())
5312 FVal = LHS;
5313 }
5314 }
5315
5316 // Emit first, and possibly only, CSEL.
5317 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5318 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
5319
5320 // If we need a second CSEL, emit it, using the output of the first as the
5321 // RHS. We're effectively OR'ing the two CC's together.
5322 if (CC2 != AArch64CC::AL) {
5323 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
5324 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
5325 }
5326
5327 // Otherwise, return the output of the first CSEL.
5328 return CS1;
5329}
5330
5331SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
5332 SelectionDAG &DAG) const {
5333 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5334 SDValue LHS = Op.getOperand(0);
5335 SDValue RHS = Op.getOperand(1);
5336 SDValue TVal = Op.getOperand(2);
5337 SDValue FVal = Op.getOperand(3);
5338 SDLoc DL(Op);
5339 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
5340}
5341
5342SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
5343 SelectionDAG &DAG) const {
5344 SDValue CCVal = Op->getOperand(0);
5345 SDValue TVal = Op->getOperand(1);
5346 SDValue FVal = Op->getOperand(2);
5347 SDLoc DL(Op);
5348
5349 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
5350 // instruction.
5351 if (isOverflowIntrOpRes(CCVal)) {
5352 // Only lower legal XALUO ops.
5353 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
5354 return SDValue();
5355
5356 AArch64CC::CondCode OFCC;
5357 SDValue Value, Overflow;
5358 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
5359 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
5360
5361 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
5362 CCVal, Overflow);
5363 }
5364
5365 // Lower it the same way as we would lower a SELECT_CC node.
5366 ISD::CondCode CC;
5367 SDValue LHS, RHS;
5368 if (CCVal.getOpcode() == ISD::SETCC) {
5369 LHS = CCVal.getOperand(0);
5370 RHS = CCVal.getOperand(1);
5371 CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
5372 } else {
5373 LHS = CCVal;
5374 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
5375 CC = ISD::SETNE;
5376 }
5377 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
5378}
5379
5380SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
5381 SelectionDAG &DAG) const {
5382 // Jump table entries as PC relative offsets. No additional tweaking
5383 // is necessary here. Just get the address of the jump table.
5384 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
5385
5386 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
5387 !Subtarget->isTargetMachO()) {
5388 return getAddrLarge(JT, DAG);
5389 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5390 return getAddrTiny(JT, DAG);
5391 }
5392 return getAddr(JT, DAG);
5393}
5394
5395SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
5396 SelectionDAG &DAG) const {
5397 // Jump table entries as PC relative offsets. No additional tweaking
5398 // is necessary here. Just get the address of the jump table.
5399 SDLoc DL(Op);
5400 SDValue JT = Op.getOperand(1);
5401 SDValue Entry = Op.getOperand(2);
5402 int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
5403
5404 SDNode *Dest =
5405 DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
5406 Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
5407 return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
5408 SDValue(Dest, 0));
5409}
5410
5411SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
5412 SelectionDAG &DAG) const {
5413 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
5414
5415 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
5416 // Use the GOT for the large code model on iOS.
5417 if (Subtarget->isTargetMachO()) {
5418 return getGOT(CP, DAG);
5419 }
5420 return getAddrLarge(CP, DAG);
5421 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5422 return getAddrTiny(CP, DAG);
5423 } else {
5424 return getAddr(CP, DAG);
5425 }
5426}
5427
5428SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
5429 SelectionDAG &DAG) const {
5430 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
5431 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
5432 !Subtarget->isTargetMachO()) {
5433 return getAddrLarge(BA, DAG);
5434 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5435 return getAddrTiny(BA, DAG);
5436 }
5437 return getAddr(BA, DAG);
5438}
5439
5440SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
5441 SelectionDAG &DAG) const {
5442 AArch64FunctionInfo *FuncInfo =
5443 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
5444
5445 SDLoc DL(Op);
5446 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
5447 getPointerTy(DAG.getDataLayout()));
5448 FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
5449 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5450 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
5451 MachinePointerInfo(SV));
5452}
5453
5454SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
5455 SelectionDAG &DAG) const {
5456 AArch64FunctionInfo *FuncInfo =
5457 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
5458
5459 SDLoc DL(Op);
5460 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
5461 ? FuncInfo->getVarArgsGPRIndex()
5462 : FuncInfo->getVarArgsStackIndex(),
5463 getPointerTy(DAG.getDataLayout()));
5464 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5465 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
5466 MachinePointerInfo(SV));
5467}
5468
5469SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
5470 SelectionDAG &DAG) const {
5471 // The layout of the va_list struct is specified in the AArch64 Procedure Call
5472 // Standard, section B.3.
5473 MachineFunction &MF = DAG.getMachineFunction();
5474 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5475 auto PtrVT = getPointerTy(DAG.getDataLayout());
5476 SDLoc DL(Op);
5477
5478 SDValue Chain = Op.getOperand(0);
5479 SDValue VAList = Op.getOperand(1);
5480 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5481 SmallVector<SDValue, 4> MemOps;
5482
5483 // void *__stack at offset 0
5484 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
5485 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
5486 MachinePointerInfo(SV), /* Alignment = */ 8));
5487
5488 // void *__gr_top at offset 8
5489 int GPRSize = FuncInfo->getVarArgsGPRSize();
5490 if (GPRSize > 0) {
5491 SDValue GRTop, GRTopAddr;
5492
5493 GRTopAddr =
5494 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
5495
5496 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
5497 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
5498 DAG.getConstant(GPRSize, DL, PtrVT));
5499
5500 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
5501 MachinePointerInfo(SV, 8),
5502 /* Alignment = */ 8));
5503 }
5504
5505 // void *__vr_top at offset 16
5506 int FPRSize = FuncInfo->getVarArgsFPRSize();
5507 if (FPRSize > 0) {
5508 SDValue VRTop, VRTopAddr;
5509 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5510 DAG.getConstant(16, DL, PtrVT));
5511
5512 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
5513 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
5514 DAG.getConstant(FPRSize, DL, PtrVT));
5515
5516 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
5517 MachinePointerInfo(SV, 16),
5518 /* Alignment = */ 8));
5519 }
5520
5521 // int __gr_offs at offset 24
5522 SDValue GROffsAddr =
5523 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
5524 MemOps.push_back(DAG.getStore(
5525 Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
5526 MachinePointerInfo(SV, 24), /* Alignment = */ 4));
5527
5528 // int __vr_offs at offset 28
5529 SDValue VROffsAddr =
5530 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
5531 MemOps.push_back(DAG.getStore(
5532 Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
5533 MachinePointerInfo(SV, 28), /* Alignment = */ 4));
5534
5535 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5536}
5537
5538SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
5539 SelectionDAG &DAG) const {
5540 MachineFunction &MF = DAG.getMachineFunction();
5541
5542 if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
5543 return LowerWin64_VASTART(Op, DAG);
5544 else if (Subtarget->isTargetDarwin())
5545 return LowerDarwin_VASTART(Op, DAG);
5546 else
5547 return LowerAAPCS_VASTART(Op, DAG);
5548}
5549
5550SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
5551 SelectionDAG &DAG) const {
5552 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
5553 // pointer.
5554 SDLoc DL(Op);
5555 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
5556 unsigned VaListSize = (Subtarget->isTargetDarwin() ||
5557 Subtarget->isTargetWindows()) ? PtrSize : 32;
5558 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
5559 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
5560
5561 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
5562 DAG.getConstant(VaListSize, DL, MVT::i32), PtrSize,
5563 false, false, false, MachinePointerInfo(DestSV),
5564 MachinePointerInfo(SrcSV));
5565}
5566
5567SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
5568 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5569, __PRETTY_FUNCTION__))
5569 "automatic va_arg instruction only works on Darwin")((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5569, __PRETTY_FUNCTION__))
;
5570
5571 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5572 EVT VT = Op.getValueType();
5573 SDLoc DL(Op);
5574 SDValue Chain = Op.getOperand(0);
5575 SDValue Addr = Op.getOperand(1);
5576 unsigned Align = Op.getConstantOperandVal(3);
5577 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
5578 auto PtrVT = getPointerTy(DAG.getDataLayout());
5579 auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
5580 SDValue VAList =
5581 DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
5582 Chain = VAList.getValue(1);
5583 VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
5584
5585 if (Align > MinSlotSize) {
5586 assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2")((((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"
) ? static_cast<void> (0) : __assert_fail ("((Align & (Align - 1)) == 0) && \"Expected Align to be a power of 2\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5586, __PRETTY_FUNCTION__))
;
5587 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5588 DAG.getConstant(Align - 1, DL, PtrVT));
5589 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
5590 DAG.getConstant(-(int64_t)Align, DL, PtrVT));
5591 }
5592
5593 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
5594 unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
5595
5596 // Scalar integer and FP values smaller than 64 bits are implicitly extended
5597 // up to 64 bits. At the very least, we have to increase the striding of the
5598 // vaargs list to match this, and for FP values we need to introduce
5599 // FP_ROUND nodes as well.
5600 if (VT.isInteger() && !VT.isVector())
5601 ArgSize = std::max(ArgSize, MinSlotSize);
5602 bool NeedFPTrunc = false;
5603 if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
5604 ArgSize = 8;
5605 NeedFPTrunc = true;
5606 }
5607
5608 // Increment the pointer, VAList, to the next vaarg
5609 SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5610 DAG.getConstant(ArgSize, DL, PtrVT));
5611 VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
5612
5613 // Store the incremented VAList to the legalized pointer
5614 SDValue APStore =
5615 DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
5616
5617 // Load the actual argument out of the pointer VAList
5618 if (NeedFPTrunc) {
5619 // Load the value as an f64.
5620 SDValue WideFP =
5621 DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
5622 // Round the value down to an f32.
5623 SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
5624 DAG.getIntPtrConstant(1, DL));
5625 SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
5626 // Merge the rounded value with the chain output of the load.
5627 return DAG.getMergeValues(Ops, DL);
5628 }
5629
5630 return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
5631}
5632
5633SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
5634 SelectionDAG &DAG) const {
5635 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5636 MFI.setFrameAddressIsTaken(true);
5637
5638 EVT VT = Op.getValueType();
5639 SDLoc DL(Op);
5640 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5641 SDValue FrameAddr =
5642 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
5643 while (Depth--)
5644 FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
5645 MachinePointerInfo());
5646
5647 if (Subtarget->isTargetILP32())
5648 FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
5649 DAG.getValueType(VT));
5650
5651 return FrameAddr;
5652}
5653
5654SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
5655 SelectionDAG &DAG) const {
5656 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5657
5658 EVT VT = getPointerTy(DAG.getDataLayout());
5659 SDLoc DL(Op);
5660 int FI = MFI.CreateFixedObject(4, 0, false);
5661 return DAG.getFrameIndex(FI, VT);
5662}
5663
5664#define GET_REGISTER_MATCHER
5665#include "AArch64GenAsmMatcher.inc"
5666
5667// FIXME? Maybe this could be a TableGen attribute on some registers and
5668// this table could be generated automatically from RegInfo.
5669Register AArch64TargetLowering::
5670getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
5671 Register Reg = MatchRegisterName(RegName);
5672 if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
5673 const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
5674 unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
5675 if (!Subtarget->isXRegisterReserved(DwarfRegNum))
5676 Reg = 0;
5677 }
5678 if (Reg)
5679 return Reg;
5680 report_fatal_error(Twine("Invalid register name \""
5681 + StringRef(RegName) + "\"."));
5682}
5683
5684SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
5685 SelectionDAG &DAG) const {
5686 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
5687
5688 EVT VT = Op.getValueType();
5689 SDLoc DL(Op);
5690
5691 SDValue FrameAddr =
5692 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
5693 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
5694
5695 return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
5696}
5697
5698SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
5699 SelectionDAG &DAG) const {
5700 MachineFunction &MF = DAG.getMachineFunction();
5701 MachineFrameInfo &MFI = MF.getFrameInfo();
5702 MFI.setReturnAddressIsTaken(true);
5703
5704 EVT VT = Op.getValueType();
5705 SDLoc DL(Op);
5706 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5707 if (Depth) {
5708 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5709 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
5710 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
5711 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
5712 MachinePointerInfo());
5713 }
5714
5715 // Return LR, which contains the return address. Mark it an implicit live-in.
5716 unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
5717 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
5718}
5719
5720/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
5721/// i64 values and take a 2 x i64 value to shift plus a shift amount.
5722SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
5723 SelectionDAG &DAG) const {
5724 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5724, __PRETTY_FUNCTION__))
;
5725 EVT VT = Op.getValueType();
5726 unsigned VTBits = VT.getSizeInBits();
5727 SDLoc dl(Op);
5728 SDValue ShOpLo = Op.getOperand(0);
5729 SDValue ShOpHi = Op.getOperand(1);
5730 SDValue ShAmt = Op.getOperand(2);
5731 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
5732
5733 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::
SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5733, __PRETTY_FUNCTION__))
;
5734
5735 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
5736 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
5737 SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
5738
5739 // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
5740 // is "undef". We wanted 0, so CSEL it directly.
5741 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
5742 ISD::SETEQ, dl, DAG);
5743 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
5744 HiBitsForLo =
5745 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
5746 HiBitsForLo, CCVal, Cmp);
5747
5748 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
5749 DAG.getConstant(VTBits, dl, MVT::i64));
5750
5751 SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
5752 SDValue LoForNormalShift =
5753 DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
5754
5755 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
5756 dl, DAG);
5757 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
5758 SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
5759 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
5760 LoForNormalShift, CCVal, Cmp);
5761
5762 // AArch64 shifts larger than the register width are wrapped rather than
5763 // clamped, so we can't just emit "hi >> x".
5764 SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
5765 SDValue HiForBigShift =
5766 Opc == ISD::SRA
5767 ? DAG.getNode(Opc, dl, VT, ShOpHi,
5768 DAG.getConstant(VTBits - 1, dl, MVT::i64))
5769 : DAG.getConstant(0, dl, VT);
5770 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
5771 HiForNormalShift, CCVal, Cmp);
5772
5773 SDValue Ops[2] = { Lo, Hi };
5774 return DAG.getMergeValues(Ops, dl);
5775}
5776
5777/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
5778/// i64 values and take a 2 x i64 value to shift plus a shift amount.
5779SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
5780 SelectionDAG &DAG) const {
5781 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5781, __PRETTY_FUNCTION__))
;
5782 EVT VT = Op.getValueType();
5783 unsigned VTBits = VT.getSizeInBits();
5784 SDLoc dl(Op);
5785 SDValue ShOpLo = Op.getOperand(0);
5786 SDValue ShOpHi = Op.getOperand(1);
5787 SDValue ShAmt = Op.getOperand(2);
5788
5789 assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void>
(0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5789, __PRETTY_FUNCTION__))
;
5790 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
5791 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
5792 SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
5793
5794 // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
5795 // is "undef". We wanted 0, so CSEL it directly.
5796 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
5797 ISD::SETEQ, dl, DAG);
5798 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
5799 LoBitsForHi =
5800 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
5801 LoBitsForHi, CCVal, Cmp);
5802
5803 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
5804 DAG.getConstant(VTBits, dl, MVT::i64));
5805 SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
5806 SDValue HiForNormalShift =
5807 DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
5808
5809 SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
5810
5811 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
5812 dl, DAG);
5813 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
5814 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
5815 HiForNormalShift, CCVal, Cmp);
5816
5817 // AArch64 shifts of larger than register sizes are wrapped rather than
5818 // clamped, so we can't just emit "lo << a" if a is too big.
5819 SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
5820 SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
5821 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
5822 LoForNormalShift, CCVal, Cmp);
5823
5824 SDValue Ops[2] = { Lo, Hi };
5825 return DAG.getMergeValues(Ops, dl);
5826}
5827
5828bool AArch64TargetLowering::isOffsetFoldingLegal(
5829 const GlobalAddressSDNode *GA) const {
5830 // Offsets are folded in the DAG combine rather than here so that we can
5831 // intelligently choose an offset based on the uses.
5832 return false;
5833}
5834
5835bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5836 bool OptForSize) const {
5837 bool IsLegal = false;
5838 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
5839 // 16-bit case when target has full fp16 support.
5840 // FIXME: We should be able to handle f128 as well with a clever lowering.
5841 const APInt ImmInt = Imm.bitcastToAPInt();
5842 if (VT == MVT::f64)
5843 IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
5844 else if (VT == MVT::f32)
5845 IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
5846 else if (VT == MVT::f16 && Subtarget->hasFullFP16())
5847 IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
5848 // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
5849 // generate that fmov.
5850
5851 // If we can not materialize in immediate field for fmov, check if the
5852 // value can be encoded as the immediate operand of a logical instruction.
5853 // The immediate value will be created with either MOVZ, MOVN, or ORR.
5854 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
5855 // The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
5856 // however the mov+fmov sequence is always better because of the reduced
5857 // cache pressure. The timings are still the same if you consider
5858 // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
5859 // movw+movk is fused). So we limit up to 2 instrdduction at most.
5860 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
5861 AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
5862 Insn);
5863 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
5864 IsLegal = Insn.size() <= Limit;
5865 }
5866
5867 LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << (IsLegal ? "Legal " : "Illegal "
) << VT.getEVTString() << " imm value: "; Imm.dump
();; } } while (false)
5868 << " imm value: "; Imm.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << (IsLegal ? "Legal " : "Illegal "
) << VT.getEVTString() << " imm value: "; Imm.dump
();; } } while (false)
;
5869 return IsLegal;
5870}
5871
5872//===----------------------------------------------------------------------===//
5873// AArch64 Optimization Hooks
5874//===----------------------------------------------------------------------===//
5875
5876static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
5877 SDValue Operand, SelectionDAG &DAG,
5878 int &ExtraSteps) {
5879 EVT VT = Operand.getValueType();
5880 if (ST->hasNEON() &&
5881 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
5882 VT == MVT::f32 || VT == MVT::v1f32 ||
5883 VT == MVT::v2f32 || VT == MVT::v4f32)) {
5884 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
5885 // For the reciprocal estimates, convergence is quadratic, so the number
5886 // of digits is doubled after each iteration. In ARMv8, the accuracy of
5887 // the initial estimate is 2^-8. Thus the number of extra steps to refine
5888 // the result for float (23 mantissa bits) is 2 and for double (52
5889 // mantissa bits) is 3.
5890 ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
5891
5892 return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
5893 }
5894
5895 return SDValue();
5896}
5897
5898SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
5899 SelectionDAG &DAG, int Enabled,
5900 int &ExtraSteps,
5901 bool &UseOneConst,
5902 bool Reciprocal) const {
5903 if (Enabled == ReciprocalEstimate::Enabled ||
5904 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
5905 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
5906 DAG, ExtraSteps)) {
5907 SDLoc DL(Operand