Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1150, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/include -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-01-13-084841-49055-1 -x c++ /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/StringSwitch.h"
31#include "llvm/ADT/Triple.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/CallingConvLower.h"
35#include "llvm/CodeGen/MachineBasicBlock.h"
36#include "llvm/CodeGen/MachineFrameInfo.h"
37#include "llvm/CodeGen/MachineFunction.h"
38#include "llvm/CodeGen/MachineInstr.h"
39#include "llvm/CodeGen/MachineInstrBuilder.h"
40#include "llvm/CodeGen/MachineMemOperand.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/RuntimeLibcalls.h"
43#include "llvm/CodeGen/SelectionDAG.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/TargetCallingConv.h"
46#include "llvm/CodeGen/TargetInstrInfo.h"
47#include "llvm/CodeGen/ValueTypes.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugLoc.h"
52#include "llvm/IR/DerivedTypes.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/GetElementPtrTypeIterator.h"
55#include "llvm/IR/GlobalValue.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/Instruction.h"
58#include "llvm/IR/Instructions.h"
59#include "llvm/IR/IntrinsicInst.h"
60#include "llvm/IR/Intrinsics.h"
61#include "llvm/IR/IntrinsicsAArch64.h"
62#include "llvm/IR/Module.h"
63#include "llvm/IR/OperandTraits.h"
64#include "llvm/IR/PatternMatch.h"
65#include "llvm/IR/Type.h"
66#include "llvm/IR/Use.h"
67#include "llvm/IR/Value.h"
68#include "llvm/MC/MCRegisterInfo.h"
69#include "llvm/Support/Casting.h"
70#include "llvm/Support/CodeGen.h"
71#include "llvm/Support/CommandLine.h"
72#include "llvm/Support/Compiler.h"
73#include "llvm/Support/Debug.h"
74#include "llvm/Support/ErrorHandling.h"
75#include "llvm/Support/KnownBits.h"
76#include "llvm/Support/MachineValueType.h"
77#include "llvm/Support/MathExtras.h"
78#include "llvm/Support/raw_ostream.h"
79#include "llvm/Target/TargetMachine.h"
80#include "llvm/Target/TargetOptions.h"
81#include <algorithm>
82#include <bitset>
83#include <cassert>
84#include <cctype>
85#include <cstdint>
86#include <cstdlib>
87#include <iterator>
88#include <limits>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace llvm::PatternMatch;
95
96#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
97
98STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
99STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
100STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
101
102static cl::opt<bool>
103EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
104 cl::desc("Allow AArch64 SLI/SRI formation"),
105 cl::init(false));
106
107// FIXME: The necessary dtprel relocations don't seem to be supported
108// well in the GNU bfd and gold linkers at the moment. Therefore, by
109// default, for now, fall back to GeneralDynamic code generation.
110cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
111 "aarch64-elf-ldtls-generation", cl::Hidden,
112 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
113 cl::init(false));
114
115static cl::opt<bool>
116EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
117 cl::desc("Enable AArch64 logical imm instruction "
118 "optimization"),
119 cl::init(true));
120
121/// Value type used for condition codes.
122static const MVT MVT_CC = MVT::i32;
123
124AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
125 const AArch64Subtarget &STI)
126 : TargetLowering(TM), Subtarget(&STI) {
127 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
128 // we have to make something up. Arbitrarily, choose ZeroOrOne.
129 setBooleanContents(ZeroOrOneBooleanContent);
130 // When comparing vectors the result sets the different elements in the
131 // vector to all-one or all-zero.
132 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
133
134 // Set up the register classes.
135 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
136 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
137
138 if (Subtarget->hasFPARMv8()) {
139 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
140 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
141 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
142 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
143 }
144
145 if (Subtarget->hasNEON()) {
146 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
147 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
148 // Someone set us up the NEON.
149 addDRTypeForNEON(MVT::v2f32);
150 addDRTypeForNEON(MVT::v8i8);
151 addDRTypeForNEON(MVT::v4i16);
152 addDRTypeForNEON(MVT::v2i32);
153 addDRTypeForNEON(MVT::v1i64);
154 addDRTypeForNEON(MVT::v1f64);
155 addDRTypeForNEON(MVT::v4f16);
156
157 addQRTypeForNEON(MVT::v4f32);
158 addQRTypeForNEON(MVT::v2f64);
159 addQRTypeForNEON(MVT::v16i8);
160 addQRTypeForNEON(MVT::v8i16);
161 addQRTypeForNEON(MVT::v4i32);
162 addQRTypeForNEON(MVT::v2i64);
163 addQRTypeForNEON(MVT::v8f16);
164 }
165
166 if (Subtarget->hasSVE()) {
167 // Add legal sve predicate types
168 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
169 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
170 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
171 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
172
173 // Add legal sve data types
174 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
175 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
176 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
177 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
178
179 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
180 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
181 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
182 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
183 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
184 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
185
186 for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
187 setOperationAction(ISD::SADDSAT, VT, Legal);
188 setOperationAction(ISD::UADDSAT, VT, Legal);
189 setOperationAction(ISD::SSUBSAT, VT, Legal);
190 setOperationAction(ISD::USUBSAT, VT, Legal);
191 }
192
193 for (auto VT :
194 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
195 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
196 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
197 }
198
199 // Compute derived properties from the register classes
200 computeRegisterProperties(Subtarget->getRegisterInfo());
201
202 // Provide all sorts of operation actions
203 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
204 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
205 setOperationAction(ISD::SETCC, MVT::i32, Custom);
206 setOperationAction(ISD::SETCC, MVT::i64, Custom);
207 setOperationAction(ISD::SETCC, MVT::f16, Custom);
208 setOperationAction(ISD::SETCC, MVT::f32, Custom);
209 setOperationAction(ISD::SETCC, MVT::f64, Custom);
210 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
211 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
212 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
213 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
214 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
215 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
216 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
217 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
218 setOperationAction(ISD::SELECT, MVT::i32, Custom);
219 setOperationAction(ISD::SELECT, MVT::i64, Custom);
220 setOperationAction(ISD::SELECT, MVT::f16, Custom);
221 setOperationAction(ISD::SELECT, MVT::f32, Custom);
222 setOperationAction(ISD::SELECT, MVT::f64, Custom);
223 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
224 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
225 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
226 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
227 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
228 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
229 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
230
231 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
232 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
233 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
234
235 setOperationAction(ISD::FREM, MVT::f32, Expand);
236 setOperationAction(ISD::FREM, MVT::f64, Expand);
237 setOperationAction(ISD::FREM, MVT::f80, Expand);
238
239 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
240
241 // Custom lowering hooks are needed for XOR
242 // to fold it into CSINC/CSINV.
243 setOperationAction(ISD::XOR, MVT::i32, Custom);
244 setOperationAction(ISD::XOR, MVT::i64, Custom);
245
246 // Virtually no operation on f128 is legal, but LLVM can't expand them when
247 // there's a valid register class, so we need custom operations in most cases.
248 setOperationAction(ISD::FABS, MVT::f128, Expand);
249 setOperationAction(ISD::FADD, MVT::f128, Custom);
250 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
251 setOperationAction(ISD::FCOS, MVT::f128, Expand);
252 setOperationAction(ISD::FDIV, MVT::f128, Custom);
253 setOperationAction(ISD::FMA, MVT::f128, Expand);
254 setOperationAction(ISD::FMUL, MVT::f128, Custom);
255 setOperationAction(ISD::FNEG, MVT::f128, Expand);
256 setOperationAction(ISD::FPOW, MVT::f128, Expand);
257 setOperationAction(ISD::FREM, MVT::f128, Expand);
258 setOperationAction(ISD::FRINT, MVT::f128, Expand);
259 setOperationAction(ISD::FSIN, MVT::f128, Expand);
260 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
261 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
262 setOperationAction(ISD::FSUB, MVT::f128, Custom);
263 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
264 setOperationAction(ISD::SETCC, MVT::f128, Custom);
265 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
266 setOperationAction(ISD::SELECT, MVT::f128, Custom);
267 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
268 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
269
270 // Lowering for many of the conversions is actually specified by the non-f128
271 // type. The LowerXXX function will be trivial when f128 isn't involved.
272 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
273 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
274 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
275 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
276 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
277 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
278 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
279 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
280 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
281 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
282 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
283 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
284 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
285 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
286
287 // Variable arguments.
288 setOperationAction(ISD::VASTART, MVT::Other, Custom);
289 setOperationAction(ISD::VAARG, MVT::Other, Custom);
290 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
291 setOperationAction(ISD::VAEND, MVT::Other, Expand);
292
293 // Variable-sized objects.
294 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
295 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
296
297 if (Subtarget->isTargetWindows())
298 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
299 else
300 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
301
302 // Constant pool entries
303 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
304
305 // BlockAddress
306 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
307
308 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
309 setOperationAction(ISD::ADDC, MVT::i32, Custom);
310 setOperationAction(ISD::ADDE, MVT::i32, Custom);
311 setOperationAction(ISD::SUBC, MVT::i32, Custom);
312 setOperationAction(ISD::SUBE, MVT::i32, Custom);
313 setOperationAction(ISD::ADDC, MVT::i64, Custom);
314 setOperationAction(ISD::ADDE, MVT::i64, Custom);
315 setOperationAction(ISD::SUBC, MVT::i64, Custom);
316 setOperationAction(ISD::SUBE, MVT::i64, Custom);
317
318 // AArch64 lacks both left-rotate and popcount instructions.
319 setOperationAction(ISD::ROTL, MVT::i32, Expand);
320 setOperationAction(ISD::ROTL, MVT::i64, Expand);
321 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
322 setOperationAction(ISD::ROTL, VT, Expand);
323 setOperationAction(ISD::ROTR, VT, Expand);
324 }
325
326 // AArch64 doesn't have {U|S}MUL_LOHI.
327 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
328 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
329
330 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
331 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
332
333 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
334 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
335 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
336 setOperationAction(ISD::SDIVREM, VT, Expand);
337 setOperationAction(ISD::UDIVREM, VT, Expand);
338 }
339 setOperationAction(ISD::SREM, MVT::i32, Expand);
340 setOperationAction(ISD::SREM, MVT::i64, Expand);
341 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
342 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
343 setOperationAction(ISD::UREM, MVT::i32, Expand);
344 setOperationAction(ISD::UREM, MVT::i64, Expand);
345
346 // Custom lower Add/Sub/Mul with overflow.
347 setOperationAction(ISD::SADDO, MVT::i32, Custom);
348 setOperationAction(ISD::SADDO, MVT::i64, Custom);
349 setOperationAction(ISD::UADDO, MVT::i32, Custom);
350 setOperationAction(ISD::UADDO, MVT::i64, Custom);
351 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
352 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
353 setOperationAction(ISD::USUBO, MVT::i32, Custom);
354 setOperationAction(ISD::USUBO, MVT::i64, Custom);
355 setOperationAction(ISD::SMULO, MVT::i32, Custom);
356 setOperationAction(ISD::SMULO, MVT::i64, Custom);
357 setOperationAction(ISD::UMULO, MVT::i32, Custom);
358 setOperationAction(ISD::UMULO, MVT::i64, Custom);
359
360 setOperationAction(ISD::FSIN, MVT::f32, Expand);
361 setOperationAction(ISD::FSIN, MVT::f64, Expand);
362 setOperationAction(ISD::FCOS, MVT::f32, Expand);
363 setOperationAction(ISD::FCOS, MVT::f64, Expand);
364 setOperationAction(ISD::FPOW, MVT::f32, Expand);
365 setOperationAction(ISD::FPOW, MVT::f64, Expand);
366 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
367 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
368 if (Subtarget->hasFullFP16())
369 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
370 else
371 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
372
373 setOperationAction(ISD::FREM, MVT::f16, Promote);
374 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
375 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
376 setOperationAction(ISD::FPOW, MVT::f16, Promote);
377 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
378 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
379 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
380 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
381 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
382 setOperationAction(ISD::FCOS, MVT::f16, Promote);
383 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
384 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
385 setOperationAction(ISD::FSIN, MVT::f16, Promote);
386 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
387 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
388 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
389 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
390 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
391 setOperationAction(ISD::FEXP, MVT::f16, Promote);
392 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
393 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
394 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
395 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
396 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
397 setOperationAction(ISD::FLOG, MVT::f16, Promote);
398 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
399 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
400 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
401 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
402 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
403 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
404 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
405 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
406
407 if (!Subtarget->hasFullFP16()) {
408 setOperationAction(ISD::SELECT, MVT::f16, Promote);
409 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
410 setOperationAction(ISD::SETCC, MVT::f16, Promote);
411 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
412 setOperationAction(ISD::FADD, MVT::f16, Promote);
413 setOperationAction(ISD::FSUB, MVT::f16, Promote);
414 setOperationAction(ISD::FMUL, MVT::f16, Promote);
415 setOperationAction(ISD::FDIV, MVT::f16, Promote);
416 setOperationAction(ISD::FMA, MVT::f16, Promote);
417 setOperationAction(ISD::FNEG, MVT::f16, Promote);
418 setOperationAction(ISD::FABS, MVT::f16, Promote);
419 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
420 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
421 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
422 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
423 setOperationAction(ISD::FRINT, MVT::f16, Promote);
424 setOperationAction(ISD::FROUND, MVT::f16, Promote);
425 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
426 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
427 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
428 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
429 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
430
431 // promote v4f16 to v4f32 when that is known to be safe.
432 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
433 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
434 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
435 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
436 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
437 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
438 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
439 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
440
441 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
442 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
443 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
444 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
445 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
446 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
447 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
448 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
449 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
450 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
451 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
452 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
453 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
454 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
455 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
456
457 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
458 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
459 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
460 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
461 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
462 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
463 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
464 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
465 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
466 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
467 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
468 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
469 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
470 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
471 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
472 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
473 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
474 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
475 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
476 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
477 }
478
479 // AArch64 has implementations of a lot of rounding-like FP operations.
480 for (MVT Ty : {MVT::f32, MVT::f64}) {
481 setOperationAction(ISD::FFLOOR, Ty, Legal);
482 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
483 setOperationAction(ISD::FCEIL, Ty, Legal);
484 setOperationAction(ISD::FRINT, Ty, Legal);
485 setOperationAction(ISD::FTRUNC, Ty, Legal);
486 setOperationAction(ISD::FROUND, Ty, Legal);
487 setOperationAction(ISD::FMINNUM, Ty, Legal);
488 setOperationAction(ISD::FMAXNUM, Ty, Legal);
489 setOperationAction(ISD::FMINIMUM, Ty, Legal);
490 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
491 setOperationAction(ISD::LROUND, Ty, Legal);
492 setOperationAction(ISD::LLROUND, Ty, Legal);
493 setOperationAction(ISD::LRINT, Ty, Legal);
494 setOperationAction(ISD::LLRINT, Ty, Legal);
495 }
496
497 if (Subtarget->hasFullFP16()) {
498 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
499 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
500 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
501 setOperationAction(ISD::FRINT, MVT::f16, Legal);
502 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
503 setOperationAction(ISD::FROUND, MVT::f16, Legal);
504 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
505 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
506 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
507 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
508 }
509
510 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
511
512 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
513
514 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
515 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
516 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
517 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
518 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
519
520 // 128-bit loads and stores can be done without expanding
521 setOperationAction(ISD::LOAD, MVT::i128, Custom);
522 setOperationAction(ISD::STORE, MVT::i128, Custom);
523
524 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
525 // This requires the Performance Monitors extension.
526 if (Subtarget->hasPerfMon())
527 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
528
529 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
530 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
531 // Issue __sincos_stret if available.
532 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
533 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
534 } else {
535 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
536 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
537 }
538
539 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
540 // MSVCRT doesn't have powi; fall back to pow
541 setLibcallName(RTLIB::POWI_F32, nullptr);
542 setLibcallName(RTLIB::POWI_F64, nullptr);
543 }
544
545 // Make floating-point constants legal for the large code model, so they don't
546 // become loads from the constant pool.
547 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
548 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
549 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
550 }
551
552 // AArch64 does not have floating-point extending loads, i1 sign-extending
553 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
554 for (MVT VT : MVT::fp_valuetypes()) {
555 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
556 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
557 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
558 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
559 }
560 for (MVT VT : MVT::integer_valuetypes())
561 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
562
563 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
564 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
565 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
566 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
567 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
568 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
569 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
570
571 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
572 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
573
574 // Indexed loads and stores are supported.
575 for (unsigned im = (unsigned)ISD::PRE_INC;
576 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
577 setIndexedLoadAction(im, MVT::i8, Legal);
578 setIndexedLoadAction(im, MVT::i16, Legal);
579 setIndexedLoadAction(im, MVT::i32, Legal);
580 setIndexedLoadAction(im, MVT::i64, Legal);
581 setIndexedLoadAction(im, MVT::f64, Legal);
582 setIndexedLoadAction(im, MVT::f32, Legal);
583 setIndexedLoadAction(im, MVT::f16, Legal);
584 setIndexedStoreAction(im, MVT::i8, Legal);
585 setIndexedStoreAction(im, MVT::i16, Legal);
586 setIndexedStoreAction(im, MVT::i32, Legal);
587 setIndexedStoreAction(im, MVT::i64, Legal);
588 setIndexedStoreAction(im, MVT::f64, Legal);
589 setIndexedStoreAction(im, MVT::f32, Legal);
590 setIndexedStoreAction(im, MVT::f16, Legal);
591 }
592
593 // Trap.
594 setOperationAction(ISD::TRAP, MVT::Other, Legal);
595 if (Subtarget->isTargetWindows())
596 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
597
598 // We combine OR nodes for bitfield operations.
599 setTargetDAGCombine(ISD::OR);
600 // Try to create BICs for vector ANDs.
601 setTargetDAGCombine(ISD::AND);
602
603 // Vector add and sub nodes may conceal a high-half opportunity.
604 // Also, try to fold ADD into CSINC/CSINV..
605 setTargetDAGCombine(ISD::ADD);
606 setTargetDAGCombine(ISD::SUB);
607 setTargetDAGCombine(ISD::SRL);
608 setTargetDAGCombine(ISD::XOR);
609 setTargetDAGCombine(ISD::SINT_TO_FP);
610 setTargetDAGCombine(ISD::UINT_TO_FP);
611
612 setTargetDAGCombine(ISD::FP_TO_SINT);
613 setTargetDAGCombine(ISD::FP_TO_UINT);
614 setTargetDAGCombine(ISD::FDIV);
615
616 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
617
618 setTargetDAGCombine(ISD::ANY_EXTEND);
619 setTargetDAGCombine(ISD::ZERO_EXTEND);
620 setTargetDAGCombine(ISD::SIGN_EXTEND);
621 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
622 setTargetDAGCombine(ISD::CONCAT_VECTORS);
623 setTargetDAGCombine(ISD::STORE);
624 if (Subtarget->supportsAddressTopByteIgnored())
625 setTargetDAGCombine(ISD::LOAD);
626
627 setTargetDAGCombine(ISD::MUL);
628
629 setTargetDAGCombine(ISD::SELECT);
630 setTargetDAGCombine(ISD::VSELECT);
631
632 setTargetDAGCombine(ISD::INTRINSIC_VOID);
633 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
634 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
635
636 setTargetDAGCombine(ISD::GlobalAddress);
637
638 // In case of strict alignment, avoid an excessive number of byte wide stores.
639 MaxStoresPerMemsetOptSize = 8;
640 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
641 ? MaxStoresPerMemsetOptSize : 32;
642
643 MaxGluedStoresPerMemcpy = 4;
644 MaxStoresPerMemcpyOptSize = 4;
645 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
646 ? MaxStoresPerMemcpyOptSize : 16;
647
648 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
649
650 MaxLoadsPerMemcmpOptSize = 4;
651 MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
652 ? MaxLoadsPerMemcmpOptSize : 8;
653
654 setStackPointerRegisterToSaveRestore(AArch64::SP);
655
656 setSchedulingPreference(Sched::Hybrid);
657
658 EnableExtLdPromotion = true;
659
660 // Set required alignment.
661 setMinFunctionAlignment(Align(4));
662 // Set preferred alignments.
663 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
664 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
665
666 // Only change the limit for entries in a jump table if specified by
667 // the sub target, but not at the command line.
668 unsigned MaxJT = STI.getMaximumJumpTableSize();
669 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
670 setMaximumJumpTableSize(MaxJT);
671
672 setHasExtractBitsInsn(true);
673
674 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
675
676 if (Subtarget->hasNEON()) {
677 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
678 // silliness like this:
679 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
680 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
681 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
682 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
683 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
684 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
685 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
686 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
687 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
688 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
689 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
690 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
691 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
692 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
693 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
694 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
695 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
696 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
697 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
698 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
699 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
700 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
701 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
702 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
703 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
704
705 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
706 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
707 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
708 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
709 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
710
711 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
712
713 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
714 // elements smaller than i32, so promote the input to i32 first.
715 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
716 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
717 // i8 vector elements also need promotion to i32 for v8i8
718 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
719 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
720 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
721 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
722 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
723 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
724 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
725 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
726 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
727 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
728 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
729
730 if (Subtarget->hasFullFP16()) {
731 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
732 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
733 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
734 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
735 } else {
736 // when AArch64 doesn't have fullfp16 support, promote the input
737 // to i32 first.
738 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
739 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
740 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
741 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
742 }
743
744 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
745 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
746
747 // AArch64 doesn't have MUL.2d:
748 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
749 // Custom handling for some quad-vector types to detect MULL.
750 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
751 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
752 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
753
754 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
755 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
756 // Vector reductions
757 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
758 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
759 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
760 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
761 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
762
763 // Saturates
764 setOperationAction(ISD::SADDSAT, VT, Legal);
765 setOperationAction(ISD::UADDSAT, VT, Legal);
766 setOperationAction(ISD::SSUBSAT, VT, Legal);
767 setOperationAction(ISD::USUBSAT, VT, Legal);
768 }
769 for (MVT VT : { MVT::v4f16, MVT::v2f32,
770 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
771 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
772 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
773 }
774
775 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
776 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
777 // Likewise, narrowing and extending vector loads/stores aren't handled
778 // directly.
779 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
780 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
781
782 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
783 setOperationAction(ISD::MULHS, VT, Legal);
784 setOperationAction(ISD::MULHU, VT, Legal);
785 } else {
786 setOperationAction(ISD::MULHS, VT, Expand);
787 setOperationAction(ISD::MULHU, VT, Expand);
788 }
789 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
790 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
791
792 setOperationAction(ISD::BSWAP, VT, Expand);
793 setOperationAction(ISD::CTTZ, VT, Expand);
794
795 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
796 setTruncStoreAction(VT, InnerVT, Expand);
797 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
798 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
799 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
800 }
801 }
802
803 // AArch64 has implementations of a lot of rounding-like FP operations.
804 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
805 setOperationAction(ISD::FFLOOR, Ty, Legal);
806 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
807 setOperationAction(ISD::FCEIL, Ty, Legal);
808 setOperationAction(ISD::FRINT, Ty, Legal);
809 setOperationAction(ISD::FTRUNC, Ty, Legal);
810 setOperationAction(ISD::FROUND, Ty, Legal);
811 }
812
813 if (Subtarget->hasFullFP16()) {
814 for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
815 setOperationAction(ISD::FFLOOR, Ty, Legal);
816 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
817 setOperationAction(ISD::FCEIL, Ty, Legal);
818 setOperationAction(ISD::FRINT, Ty, Legal);
819 setOperationAction(ISD::FTRUNC, Ty, Legal);
820 setOperationAction(ISD::FROUND, Ty, Legal);
821 }
822 }
823
824 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
825 }
826
827 if (Subtarget->hasSVE()) {
828 // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
829 // splat of 0 or undef) once vector selects supported in SVE codegen. See
830 // D68877 for more details.
831 for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
832 if (isTypeLegal(VT))
833 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
834 }
835 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
836 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
837 }
838
839 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
840}
841
842void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
843 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 843, __PRETTY_FUNCTION__))
;
844
845 if (VT.isFloatingPoint()) {
846 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
847 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
848 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
849 }
850
851 // Mark vector float intrinsics as expand.
852 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
853 setOperationAction(ISD::FSIN, VT, Expand);
854 setOperationAction(ISD::FCOS, VT, Expand);
855 setOperationAction(ISD::FPOW, VT, Expand);
856 setOperationAction(ISD::FLOG, VT, Expand);
857 setOperationAction(ISD::FLOG2, VT, Expand);
858 setOperationAction(ISD::FLOG10, VT, Expand);
859 setOperationAction(ISD::FEXP, VT, Expand);
860 setOperationAction(ISD::FEXP2, VT, Expand);
861
862 // But we do support custom-lowering for FCOPYSIGN.
863 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
864 }
865
866 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
867 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
868 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
869 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
870 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
871 setOperationAction(ISD::SRA, VT, Custom);
872 setOperationAction(ISD::SRL, VT, Custom);
873 setOperationAction(ISD::SHL, VT, Custom);
874 setOperationAction(ISD::OR, VT, Custom);
875 setOperationAction(ISD::SETCC, VT, Custom);
876 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
877
878 setOperationAction(ISD::SELECT, VT, Expand);
879 setOperationAction(ISD::SELECT_CC, VT, Expand);
880 setOperationAction(ISD::VSELECT, VT, Expand);
881 for (MVT InnerVT : MVT::all_valuetypes())
882 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
883
884 // CNT supports only B element sizes, then use UADDLP to widen.
885 if (VT != MVT::v8i8 && VT != MVT::v16i8)
886 setOperationAction(ISD::CTPOP, VT, Custom);
887
888 setOperationAction(ISD::UDIV, VT, Expand);
889 setOperationAction(ISD::SDIV, VT, Expand);
890 setOperationAction(ISD::UREM, VT, Expand);
891 setOperationAction(ISD::SREM, VT, Expand);
892 setOperationAction(ISD::FREM, VT, Expand);
893
894 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
895 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
896
897 if (!VT.isFloatingPoint())
898 setOperationAction(ISD::ABS, VT, Legal);
899
900 // [SU][MIN|MAX] are available for all NEON types apart from i64.
901 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
902 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
903 setOperationAction(Opcode, VT, Legal);
904
905 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
906 if (VT.isFloatingPoint() &&
907 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
908 for (unsigned Opcode :
909 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
910 setOperationAction(Opcode, VT, Legal);
911
912 if (Subtarget->isLittleEndian()) {
913 for (unsigned im = (unsigned)ISD::PRE_INC;
914 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
915 setIndexedLoadAction(im, VT, Legal);
916 setIndexedStoreAction(im, VT, Legal);
917 }
918 }
919}
920
921void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
922 addRegisterClass(VT, &AArch64::FPR64RegClass);
923 addTypeForNEON(VT, MVT::v2i32);
924}
925
926void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
927 addRegisterClass(VT, &AArch64::FPR128RegClass);
928 addTypeForNEON(VT, MVT::v4i32);
929}
930
931EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
932 EVT VT) const {
933 if (!VT.isVector())
934 return MVT::i32;
935 return VT.changeVectorElementTypeToInteger();
936}
937
938static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
939 const APInt &Demanded,
940 TargetLowering::TargetLoweringOpt &TLO,
941 unsigned NewOpc) {
942 uint64_t OldImm = Imm, NewImm, Enc;
943 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
944
945 // Return if the immediate is already all zeros, all ones, a bimm32 or a
946 // bimm64.
947 if (Imm == 0 || Imm == Mask ||
948 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
949 return false;
950
951 unsigned EltSize = Size;
952 uint64_t DemandedBits = Demanded.getZExtValue();
953
954 // Clear bits that are not demanded.
955 Imm &= DemandedBits;
956
957 while (true) {
958 // The goal here is to set the non-demanded bits in a way that minimizes
959 // the number of switching between 0 and 1. In order to achieve this goal,
960 // we set the non-demanded bits to the value of the preceding demanded bits.
961 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
962 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
963 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
964 // The final result is 0b11000011.
965 uint64_t NonDemandedBits = ~DemandedBits;
966 uint64_t InvertedImm = ~Imm & DemandedBits;
967 uint64_t RotatedImm =
968 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
969 NonDemandedBits;
970 uint64_t Sum = RotatedImm + NonDemandedBits;
971 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
972 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
973 NewImm = (Imm | Ones) & Mask;
974
975 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
976 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
977 // we halve the element size and continue the search.
978 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
979 break;
980
981 // We cannot shrink the element size any further if it is 2-bits.
982 if (EltSize == 2)
983 return false;
984
985 EltSize /= 2;
986 Mask >>= EltSize;
987 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
988
989 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
990 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
991 return false;
992
993 // Merge the upper and lower halves of Imm and DemandedBits.
994 Imm |= Hi;
995 DemandedBits |= DemandedBitsHi;
996 }
997
998 ++NumOptimizedImms;
999
1000 // Replicate the element across the register width.
1001 while (EltSize < Size) {
1002 NewImm |= NewImm << EltSize;
1003 EltSize *= 2;
1004 }
1005
1006 (void)OldImm;
1007 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1008, __PRETTY_FUNCTION__))
1008 "demanded bits should never be altered")((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1008, __PRETTY_FUNCTION__))
;
1009 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")((OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? static_cast<void> (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1009, __PRETTY_FUNCTION__))
;
1010
1011 // Create the new constant immediate node.
1012 EVT VT = Op.getValueType();
1013 SDLoc DL(Op);
1014 SDValue New;
1015
1016 // If the new constant immediate is all-zeros or all-ones, let the target
1017 // independent DAG combine optimize this node.
1018 if (NewImm == 0 || NewImm == OrigMask) {
1019 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1020 TLO.DAG.getConstant(NewImm, DL, VT));
1021 // Otherwise, create a machine node so that target independent DAG combine
1022 // doesn't undo this optimization.
1023 } else {
1024 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1025 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1026 New = SDValue(
1027 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1028 }
1029
1030 return TLO.CombineTo(Op, New);
1031}
1032
1033bool AArch64TargetLowering::targetShrinkDemandedConstant(
1034 SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
1035 // Delay this optimization to as late as possible.
1036 if (!TLO.LegalOps)
1037 return false;
1038
1039 if (!EnableOptimizeLogicalImm)
1040 return false;
1041
1042 EVT VT = Op.getValueType();
1043 if (VT.isVector())
1044 return false;
1045
1046 unsigned Size = VT.getSizeInBits();
1047 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1048, __PRETTY_FUNCTION__))
1048 "i32 or i64 is expected after legalization.")(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1048, __PRETTY_FUNCTION__))
;
1049
1050 // Exit early if we demand all bits.
1051 if (Demanded.countPopulation() == Size)
1052 return false;
1053
1054 unsigned NewOpc;
1055 switch (Op.getOpcode()) {
1056 default:
1057 return false;
1058 case ISD::AND:
1059 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1060 break;
1061 case ISD::OR:
1062 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1063 break;
1064 case ISD::XOR:
1065 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1066 break;
1067 }
1068 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1069 if (!C)
1070 return false;
1071 uint64_t Imm = C->getZExtValue();
1072 return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
1073}
1074
1075/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1076/// Mask are known to be either zero or one and return them Known.
1077void AArch64TargetLowering::computeKnownBitsForTargetNode(
1078 const SDValue Op, KnownBits &Known,
1079 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1080 switch (Op.getOpcode()) {
1081 default:
1082 break;
1083 case AArch64ISD::CSEL: {
1084 KnownBits Known2;
1085 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1086 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1087 Known.Zero &= Known2.Zero;
1088 Known.One &= Known2.One;
1089 break;
1090 }
1091 case AArch64ISD::LOADgot:
1092 case AArch64ISD::ADDlow: {
1093 if (!Subtarget->isTargetILP32())
1094 break;
1095 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1096 Known.Zero = APInt::getHighBitsSet(64, 32);
1097 break;
1098 }
1099 case ISD::INTRINSIC_W_CHAIN: {
1100 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1101 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1102 switch (IntID) {
1103 default: return;
1104 case Intrinsic::aarch64_ldaxr:
1105 case Intrinsic::aarch64_ldxr: {
1106 unsigned BitWidth = Known.getBitWidth();
1107 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1108 unsigned MemBits = VT.getScalarSizeInBits();
1109 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1110 return;
1111 }
1112 }
1113 break;
1114 }
1115 case ISD::INTRINSIC_WO_CHAIN:
1116 case ISD::INTRINSIC_VOID: {
1117 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1118 switch (IntNo) {
1119 default:
1120 break;
1121 case Intrinsic::aarch64_neon_umaxv:
1122 case Intrinsic::aarch64_neon_uminv: {
1123 // Figure out the datatype of the vector operand. The UMINV instruction
1124 // will zero extend the result, so we can mark as known zero all the
1125 // bits larger than the element datatype. 32-bit or larget doesn't need
1126 // this as those are legal types and will be handled by isel directly.
1127 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1128 unsigned BitWidth = Known.getBitWidth();
1129 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1130 assert(BitWidth >= 8 && "Unexpected width!")((BitWidth >= 8 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1130, __PRETTY_FUNCTION__))
;
1131 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1132 Known.Zero |= Mask;
1133 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1134 assert(BitWidth >= 16 && "Unexpected width!")((BitWidth >= 16 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1134, __PRETTY_FUNCTION__))
;
1135 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1136 Known.Zero |= Mask;
1137 }
1138 break;
1139 } break;
1140 }
1141 }
1142 }
1143}
1144
1145MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1146 EVT) const {
1147 return MVT::i64;
1148}
1149
1150bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1151 EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1152 bool *Fast) const {
1153 if (Subtarget->requiresStrictAlign())
1154 return false;
1155
1156 if (Fast) {
1157 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1158 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1159 // See comments in performSTORECombine() for more details about
1160 // these conditions.
1161
1162 // Code that uses clang vector extensions can mark that it
1163 // wants unaligned accesses to be treated as fast by
1164 // underspecifying alignment to be 1 or 2.
1165 Align <= 2 ||
1166
1167 // Disregard v2i64. Memcpy lowering produces those and splitting
1168 // them regresses performance on micro-benchmarks and olden/bh.
1169 VT == MVT::v2i64;
1170 }
1171 return true;
1172}
1173
1174// Same as above but handling LLTs instead.
1175bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1176 LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1177 bool *Fast) const {
1178 if (Subtarget->requiresStrictAlign())
1179 return false;
1180
1181 if (Fast) {
1182 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1183 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1184 Ty.getSizeInBytes() != 16 ||
1185 // See comments in performSTORECombine() for more details about
1186 // these conditions.
1187
1188 // Code that uses clang vector extensions can mark that it
1189 // wants unaligned accesses to be treated as fast by
1190 // underspecifying alignment to be 1 or 2.
1191 Align <= 2 ||
1192
1193 // Disregard v2i64. Memcpy lowering produces those and splitting
1194 // them regresses performance on micro-benchmarks and olden/bh.
1195 Ty == LLT::vector(2, 64);
1196 }
1197 return true;
1198}
1199
1200FastISel *
1201AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1202 const TargetLibraryInfo *libInfo) const {
1203 return AArch64::createFastISel(funcInfo, libInfo);
1204}
1205
1206const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1207 switch ((AArch64ISD::NodeType)Opcode) {
1208 case AArch64ISD::FIRST_NUMBER: break;
1209 case AArch64ISD::CALL: return "AArch64ISD::CALL";
1210 case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
1211 case AArch64ISD::ADR: return "AArch64ISD::ADR";
1212 case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
1213 case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
1214 case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
1215 case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
1216 case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
1217 case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
1218 case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
1219 case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
1220 case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
1221 case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
1222 case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1223 case AArch64ISD::ADC: return "AArch64ISD::ADC";
1224 case AArch64ISD::SBC: return "AArch64ISD::SBC";
1225 case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
1226 case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
1227 case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
1228 case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
1229 case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
1230 case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
1231 case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
1232 case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
1233 case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
1234 case AArch64ISD::DUP: return "AArch64ISD::DUP";
1235 case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
1236 case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
1237 case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
1238 case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
1239 case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
1240 case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
1241 case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
1242 case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
1243 case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
1244 case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
1245 case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
1246 case AArch64ISD::BICi: return "AArch64ISD::BICi";
1247 case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
1248 case AArch64ISD::BSL: return "AArch64ISD::BSL";
1249 case AArch64ISD::NEG: return "AArch64ISD::NEG";
1250 case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
1251 case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
1252 case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
1253 case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
1254 case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
1255 case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
1256 case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
1257 case AArch64ISD::REV16: return "AArch64ISD::REV16";
1258 case AArch64ISD::REV32: return "AArch64ISD::REV32";
1259 case AArch64ISD::REV64: return "AArch64ISD::REV64";
1260 case AArch64ISD::EXT: return "AArch64ISD::EXT";
1261 case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
1262 case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
1263 case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
1264 case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
1265 case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
1266 case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
1267 case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
1268 case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
1269 case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
1270 case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
1271 case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
1272 case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
1273 case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
1274 case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
1275 case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
1276 case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
1277 case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
1278 case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
1279 case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
1280 case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
1281 case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
1282 case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
1283 case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
1284 case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
1285 case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
1286 case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
1287 case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
1288 case AArch64ISD::SMAXV_PRED: return "AArch64ISD::SMAXV_PRED";
1289 case AArch64ISD::UMAXV_PRED: return "AArch64ISD::UMAXV_PRED";
1290 case AArch64ISD::SMINV_PRED: return "AArch64ISD::SMINV_PRED";
1291 case AArch64ISD::UMINV_PRED: return "AArch64ISD::UMINV_PRED";
1292 case AArch64ISD::ORV_PRED: return "AArch64ISD::ORV_PRED";
1293 case AArch64ISD::EORV_PRED: return "AArch64ISD::EORV_PRED";
1294 case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED";
1295 case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N";
1296 case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N";
1297 case AArch64ISD::LASTA: return "AArch64ISD::LASTA";
1298 case AArch64ISD::LASTB: return "AArch64ISD::LASTB";
1299 case AArch64ISD::REV: return "AArch64ISD::REV";
1300 case AArch64ISD::TBL: return "AArch64ISD::TBL";
1301 case AArch64ISD::NOT: return "AArch64ISD::NOT";
1302 case AArch64ISD::BIT: return "AArch64ISD::BIT";
1303 case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
1304 case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
1305 case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
1306 case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
1307 case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
1308 case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
1309 case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
1310 case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
1311 case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
1312 case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
1313 case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
1314 case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
1315 case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
1316 case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
1317 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
1318 case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
1319 case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
1320 case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
1321 case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
1322 case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
1323 case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
1324 case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
1325 case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
1326 case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
1327 case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
1328 case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
1329 case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1330 case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1331 case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1332 case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1333 case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1334 case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1335 case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1336 case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1337 case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1338 case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1339 case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1340 case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1341 case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1342 case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1343 case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1344 case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1345 case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1346 case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1347 case AArch64ISD::STG: return "AArch64ISD::STG";
1348 case AArch64ISD::STZG: return "AArch64ISD::STZG";
1349 case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
1350 case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
1351 case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI";
1352 case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";
1353 case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
1354 case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
1355 case AArch64ISD::INSR: return "AArch64ISD::INSR";
1356 case AArch64ISD::PTRUE: return "AArch64ISD::PTRUE";
1357 case AArch64ISD::GLD1: return "AArch64ISD::GLD1";
1358 case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED";
1359 case AArch64ISD::GLD1_SXTW: return "AArch64ISD::GLD1_SXTW";
1360 case AArch64ISD::GLD1_UXTW: return "AArch64ISD::GLD1_UXTW";
1361 case AArch64ISD::GLD1_SXTW_SCALED: return "AArch64ISD::GLD1_SXTW_SCALED";
1362 case AArch64ISD::GLD1_UXTW_SCALED: return "AArch64ISD::GLD1_UXTW_SCALED";
1363 case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM";
1364 case AArch64ISD::GLD1S: return "AArch64ISD::GLD1S";
1365 case AArch64ISD::GLD1S_SCALED: return "AArch64ISD::GLD1S_SCALED";
1366 case AArch64ISD::GLD1S_SXTW: return "AArch64ISD::GLD1S_SXTW";
1367 case AArch64ISD::GLD1S_UXTW: return "AArch64ISD::GLD1S_UXTW";
1368 case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";
1369 case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";
1370 case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";
1371 case AArch64ISD::SST1: return "AArch64ISD::SST1";
1372 case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
1373 case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
1374 case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
1375 case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
1376 case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
1377 case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";
1378 case AArch64ISD::LDP: return "AArch64ISD::LDP";
1379 case AArch64ISD::STP: return "AArch64ISD::STP";
1380 }
1381 return nullptr;
1382}
1383
1384MachineBasicBlock *
1385AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
1386 MachineBasicBlock *MBB) const {
1387 // We materialise the F128CSEL pseudo-instruction as some control flow and a
1388 // phi node:
1389
1390 // OrigBB:
1391 // [... previous instrs leading to comparison ...]
1392 // b.ne TrueBB
1393 // b EndBB
1394 // TrueBB:
1395 // ; Fallthrough
1396 // EndBB:
1397 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1398
1399 MachineFunction *MF = MBB->getParent();
1400 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1401 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1402 DebugLoc DL = MI.getDebugLoc();
1403 MachineFunction::iterator It = ++MBB->getIterator();
1404
1405 Register DestReg = MI.getOperand(0).getReg();
1406 Register IfTrueReg = MI.getOperand(1).getReg();
1407 Register IfFalseReg = MI.getOperand(2).getReg();
1408 unsigned CondCode = MI.getOperand(3).getImm();
1409 bool NZCVKilled = MI.getOperand(4).isKill();
1410
1411 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1412 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1413 MF->insert(It, TrueBB);
1414 MF->insert(It, EndBB);
1415
1416 // Transfer rest of current basic-block to EndBB
1417 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1418 MBB->end());
1419 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1420
1421 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1422 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1423 MBB->addSuccessor(TrueBB);
1424 MBB->addSuccessor(EndBB);
1425
1426 // TrueBB falls through to the end.
1427 TrueBB->addSuccessor(EndBB);
1428
1429 if (!NZCVKilled) {
1430 TrueBB->addLiveIn(AArch64::NZCV);
1431 EndBB->addLiveIn(AArch64::NZCV);
1432 }
1433
1434 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1435 .addReg(IfTrueReg)
1436 .addMBB(TrueBB)
1437 .addReg(IfFalseReg)
1438 .addMBB(MBB);
1439
1440 MI.eraseFromParent();
1441 return EndBB;
1442}
1443
1444MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
1445 MachineInstr &MI, MachineBasicBlock *BB) const {
1446 assert(!isAsynchronousEHPersonality(classifyEHPersonality(((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1448, __PRETTY_FUNCTION__))
1447 BB->getParent()->getFunction().getPersonalityFn())) &&((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1448, __PRETTY_FUNCTION__))
1448 "SEH does not use catchret!")((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1448, __PRETTY_FUNCTION__))
;
1449 return BB;
1450}
1451
1452MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchPad(
1453 MachineInstr &MI, MachineBasicBlock *BB) const {
1454 MI.eraseFromParent();
1455 return BB;
1456}
1457
1458MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
1459 MachineInstr &MI, MachineBasicBlock *BB) const {
1460 switch (MI.getOpcode()) {
1461 default:
1462#ifndef NDEBUG
1463 MI.dump();
1464#endif
1465 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1465)
;
1466
1467 case AArch64::F128CSEL:
1468 return EmitF128CSEL(MI, BB);
1469
1470 case TargetOpcode::STACKMAP:
1471 case TargetOpcode::PATCHPOINT:
1472 return emitPatchPoint(MI, BB);
1473
1474 case AArch64::CATCHRET:
1475 return EmitLoweredCatchRet(MI, BB);
1476 case AArch64::CATCHPAD:
1477 return EmitLoweredCatchPad(MI, BB);
1478 }
1479}
1480
1481//===----------------------------------------------------------------------===//
1482// AArch64 Lowering private implementation.
1483//===----------------------------------------------------------------------===//
1484
1485//===----------------------------------------------------------------------===//
1486// Lowering Code
1487//===----------------------------------------------------------------------===//
1488
1489/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1490/// CC
1491static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1492 switch (CC) {
1493 default:
1494 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1494)
;
1495 case ISD::SETNE:
1496 return AArch64CC::NE;
1497 case ISD::SETEQ:
1498 return AArch64CC::EQ;
1499 case ISD::SETGT:
1500 return AArch64CC::GT;
1501 case ISD::SETGE:
1502 return AArch64CC::GE;
1503 case ISD::SETLT:
1504 return AArch64CC::LT;
1505 case ISD::SETLE:
1506 return AArch64CC::LE;
1507 case ISD::SETUGT:
1508 return AArch64CC::HI;
1509 case ISD::SETUGE:
1510 return AArch64CC::HS;
1511 case ISD::SETULT:
1512 return AArch64CC::LO;
1513 case ISD::SETULE:
1514 return AArch64CC::LS;
1515 }
1516}
1517
1518/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1519static void changeFPCCToAArch64CC(ISD::CondCode CC,
1520 AArch64CC::CondCode &CondCode,
1521 AArch64CC::CondCode &CondCode2) {
1522 CondCode2 = AArch64CC::AL;
1523 switch (CC) {
1524 default:
1525 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1525)
;
1526 case ISD::SETEQ:
1527 case ISD::SETOEQ:
1528 CondCode = AArch64CC::EQ;
1529 break;
1530 case ISD::SETGT:
1531 case ISD::SETOGT:
1532 CondCode = AArch64CC::GT;
1533 break;
1534 case ISD::SETGE:
1535 case ISD::SETOGE:
1536 CondCode = AArch64CC::GE;
1537 break;
1538 case ISD::SETOLT:
1539 CondCode = AArch64CC::MI;
1540 break;
1541 case ISD::SETOLE:
1542 CondCode = AArch64CC::LS;
1543 break;
1544 case ISD::SETONE:
1545 CondCode = AArch64CC::MI;
1546 CondCode2 = AArch64CC::GT;
1547 break;
1548 case ISD::SETO:
1549 CondCode = AArch64CC::VC;
1550 break;
1551 case ISD::SETUO:
1552 CondCode = AArch64CC::VS;
1553 break;
1554 case ISD::SETUEQ:
1555 CondCode = AArch64CC::EQ;
1556 CondCode2 = AArch64CC::VS;
1557 break;
1558 case ISD::SETUGT:
1559 CondCode = AArch64CC::HI;
1560 break;
1561 case ISD::SETUGE:
1562 CondCode = AArch64CC::PL;
1563 break;
1564 case ISD::SETLT:
1565 case ISD::SETULT:
1566 CondCode = AArch64CC::LT;
1567 break;
1568 case ISD::SETLE:
1569 case ISD::SETULE:
1570 CondCode = AArch64CC::LE;
1571 break;
1572 case ISD::SETNE:
1573 case ISD::SETUNE:
1574 CondCode = AArch64CC::NE;
1575 break;
1576 }
1577}
1578
1579/// Convert a DAG fp condition code to an AArch64 CC.
1580/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1581/// should be AND'ed instead of OR'ed.
1582static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1583 AArch64CC::CondCode &CondCode,
1584 AArch64CC::CondCode &CondCode2) {
1585 CondCode2 = AArch64CC::AL;
1586 switch (CC) {
1587 default:
1588 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1589 assert(CondCode2 == AArch64CC::AL)((CondCode2 == AArch64CC::AL) ? static_cast<void> (0) :
__assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1589, __PRETTY_FUNCTION__))
;
1590 break;
1591 case ISD::SETONE:
1592 // (a one b)
1593 // == ((a olt b) || (a ogt b))
1594 // == ((a ord b) && (a une b))
1595 CondCode = AArch64CC::VC;
1596 CondCode2 = AArch64CC::NE;
1597 break;
1598 case ISD::SETUEQ:
1599 // (a ueq b)
1600 // == ((a uno b) || (a oeq b))
1601 // == ((a ule b) && (a uge b))
1602 CondCode = AArch64CC::PL;
1603 CondCode2 = AArch64CC::LE;
1604 break;
1605 }
1606}
1607
1608/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1609/// CC usable with the vector instructions. Fewer operations are available
1610/// without a real NZCV register, so we have to use less efficient combinations
1611/// to get the same effect.
1612static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1613 AArch64CC::CondCode &CondCode,
1614 AArch64CC::CondCode &CondCode2,
1615 bool &Invert) {
1616 Invert = false;
1617 switch (CC) {
1618 default:
1619 // Mostly the scalar mappings work fine.
1620 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1621 break;
1622 case ISD::SETUO:
1623 Invert = true;
1624 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1625 case ISD::SETO:
1626 CondCode = AArch64CC::MI;
1627 CondCode2 = AArch64CC::GE;
1628 break;
1629 case ISD::SETUEQ:
1630 case ISD::SETULT:
1631 case ISD::SETULE:
1632 case ISD::SETUGT:
1633 case ISD::SETUGE:
1634 // All of the compare-mask comparisons are ordered, but we can switch
1635 // between the two by a double inversion. E.g. ULE == !OGT.
1636 Invert = true;
1637 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
1638 CondCode, CondCode2);
1639 break;
1640 }
1641}
1642
1643static bool isLegalArithImmed(uint64_t C) {
1644 // Matches AArch64DAGToDAGISel::SelectArithImmed().
1645 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1646 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
1647 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
1648 return IsLegal;
1649}
1650
1651// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
1652// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
1653// can be set differently by this operation. It comes down to whether
1654// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1655// everything is fine. If not then the optimization is wrong. Thus general
1656// comparisons are only valid if op2 != 0.
1657//
1658// So, finally, the only LLVM-native comparisons that don't mention C and V
1659// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1660// the absence of information about op2.
1661static bool isCMN(SDValue Op, ISD::CondCode CC) {
1662 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
1663 (CC == ISD::SETEQ || CC == ISD::SETNE);
1664}
1665
1666static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1667 const SDLoc &dl, SelectionDAG &DAG) {
1668 EVT VT = LHS.getValueType();
1669 const bool FullFP16 =
1670 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1671
1672 if (VT.isFloatingPoint()) {
1673 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1673, __PRETTY_FUNCTION__))
;
1674 if (VT == MVT::f16 && !FullFP16) {
1675 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1676 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1677 VT = MVT::f32;
1678 }
1679 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1680 }
1681
1682 // The CMP instruction is just an alias for SUBS, and representing it as
1683 // SUBS means that it's possible to get CSE with subtract operations.
1684 // A later phase can perform the optimization of setting the destination
1685 // register to WZR/XZR if it ends up being unused.
1686 unsigned Opcode = AArch64ISD::SUBS;
1687
1688 if (isCMN(RHS, CC)) {
1689 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
1690 Opcode = AArch64ISD::ADDS;
1691 RHS = RHS.getOperand(1);
1692 } else if (isCMN(LHS, CC)) {
1693 // As we are looking for EQ/NE compares, the operands can be commuted ; can
1694 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
1695 Opcode = AArch64ISD::ADDS;
1696 LHS = LHS.getOperand(1);
1697 } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
1698 !isUnsignedIntSetCC(CC)) {
1699 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1700 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1701 // of the signed comparisons.
1702 Opcode = AArch64ISD::ANDS;
1703 RHS = LHS.getOperand(1);
1704 LHS = LHS.getOperand(0);
1705 }
1706
1707 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1708 .getValue(1);
1709}
1710
1711/// \defgroup AArch64CCMP CMP;CCMP matching
1712///
1713/// These functions deal with the formation of CMP;CCMP;... sequences.
1714/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1715/// a comparison. They set the NZCV flags to a predefined value if their
1716/// predicate is false. This allows to express arbitrary conjunctions, for
1717/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
1718/// expressed as:
1719/// cmp A
1720/// ccmp B, inv(CB), CA
1721/// check for CB flags
1722///
1723/// This naturally lets us implement chains of AND operations with SETCC
1724/// operands. And we can even implement some other situations by transforming
1725/// them:
1726/// - We can implement (NEG SETCC) i.e. negating a single comparison by
1727/// negating the flags used in a CCMP/FCCMP operations.
1728/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
1729/// by negating the flags we test for afterwards. i.e.
1730/// NEG (CMP CCMP CCCMP ...) can be implemented.
1731/// - Note that we can only ever negate all previously processed results.
1732/// What we can not implement by flipping the flags to test is a negation
1733/// of two sub-trees (because the negation affects all sub-trees emitted so
1734/// far, so the 2nd sub-tree we emit would also affect the first).
1735/// With those tools we can implement some OR operations:
1736/// - (OR (SETCC A) (SETCC B)) can be implemented via:
1737/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
1738/// - After transforming OR to NEG/AND combinations we may be able to use NEG
1739/// elimination rules from earlier to implement the whole thing as a
1740/// CCMP/FCCMP chain.
1741///
1742/// As complete example:
1743/// or (or (setCA (cmp A)) (setCB (cmp B)))
1744/// (and (setCC (cmp C)) (setCD (cmp D)))"
1745/// can be reassociated to:
1746/// or (and (setCC (cmp C)) setCD (cmp D))
1747// (or (setCA (cmp A)) (setCB (cmp B)))
1748/// can be transformed to:
1749/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
1750/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1751/// which can be implemented as:
1752/// cmp C
1753/// ccmp D, inv(CD), CC
1754/// ccmp A, CA, inv(CD)
1755/// ccmp B, CB, inv(CA)
1756/// check for CB flags
1757///
1758/// A counterexample is "or (and A B) (and C D)" which translates to
1759/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
1760/// can only implement 1 of the inner (not) operations, but not both!
1761/// @{
1762
1763/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1764static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1765 ISD::CondCode CC, SDValue CCOp,
1766 AArch64CC::CondCode Predicate,
1767 AArch64CC::CondCode OutCC,
1768 const SDLoc &DL, SelectionDAG &DAG) {
1769 unsigned Opcode = 0;
1770 const bool FullFP16 =
1771 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1772
1773 if (LHS.getValueType().isFloatingPoint()) {
1774 assert(LHS.getValueType() != MVT::f128)((LHS.getValueType() != MVT::f128) ? static_cast<void> (
0) : __assert_fail ("LHS.getValueType() != MVT::f128", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1774, __PRETTY_FUNCTION__))
;
1775 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
1776 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1777 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1778 }
1779 Opcode = AArch64ISD::FCCMP;
1780 } else if (RHS.getOpcode() == ISD::SUB) {
1781 SDValue SubOp0 = RHS.getOperand(0);
1782 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1783 // See emitComparison() on why we can only do this for SETEQ and SETNE.
1784 Opcode = AArch64ISD::CCMN;
1785 RHS = RHS.getOperand(1);
1786 }
1787 }
1788 if (Opcode == 0)
1789 Opcode = AArch64ISD::CCMP;
1790
1791 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1792 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1793 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1794 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1795 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1796}
1797
1798/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
1799/// expressed as a conjunction. See \ref AArch64CCMP.
1800/// \param CanNegate Set to true if we can negate the whole sub-tree just by
1801/// changing the conditions on the SETCC tests.
1802/// (this means we can call emitConjunctionRec() with
1803/// Negate==true on this sub-tree)
1804/// \param MustBeFirst Set to true if this subtree needs to be negated and we
1805/// cannot do the negation naturally. We are required to
1806/// emit the subtree first in this case.
1807/// \param WillNegate Is true if are called when the result of this
1808/// subexpression must be negated. This happens when the
1809/// outer expression is an OR. We can use this fact to know
1810/// that we have a double negation (or (or ...) ...) that
1811/// can be implemented for free.
1812static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
1813 bool &MustBeFirst, bool WillNegate,
1814 unsigned Depth = 0) {
1815 if (!Val.hasOneUse())
1816 return false;
1817 unsigned Opcode = Val->getOpcode();
1818 if (Opcode == ISD::SETCC) {
1819 if (Val->getOperand(0).getValueType() == MVT::f128)
1820 return false;
1821 CanNegate = true;
1822 MustBeFirst = false;
1823 return true;
1824 }
1825 // Protect against exponential runtime and stack overflow.
1826 if (Depth > 6)
1827 return false;
1828 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1829 bool IsOR = Opcode == ISD::OR;
1830 SDValue O0 = Val->getOperand(0);
1831 SDValue O1 = Val->getOperand(1);
1832 bool CanNegateL;
1833 bool MustBeFirstL;
1834 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
1835 return false;
1836 bool CanNegateR;
1837 bool MustBeFirstR;
1838 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
1839 return false;
1840
1841 if (MustBeFirstL && MustBeFirstR)
1842 return false;
1843
1844 if (IsOR) {
1845 // For an OR expression we need to be able to naturally negate at least
1846 // one side or we cannot do the transformation at all.
1847 if (!CanNegateL && !CanNegateR)
1848 return false;
1849 // If we the result of the OR will be negated and we can naturally negate
1850 // the leafs, then this sub-tree as a whole negates naturally.
1851 CanNegate = WillNegate && CanNegateL && CanNegateR;
1852 // If we cannot naturally negate the whole sub-tree, then this must be
1853 // emitted first.
1854 MustBeFirst = !CanNegate;
1855 } else {
1856 assert(Opcode == ISD::AND && "Must be OR or AND")((Opcode == ISD::AND && "Must be OR or AND") ? static_cast
<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1856, __PRETTY_FUNCTION__))
;
1857 // We cannot naturally negate an AND operation.
1858 CanNegate = false;
1859 MustBeFirst = MustBeFirstL || MustBeFirstR;
1860 }
1861 return true;
1862 }
1863 return false;
1864}
1865
1866/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1867/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1868/// Tries to transform the given i1 producing node @p Val to a series compare
1869/// and conditional compare operations. @returns an NZCV flags producing node
1870/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1871/// transformation was not possible.
1872/// \p Negate is true if we want this sub-tree being negated just by changing
1873/// SETCC conditions.
1874static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
1875 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1876 AArch64CC::CondCode Predicate) {
1877 // We're at a tree leaf, produce a conditional comparison operation.
1878 unsigned Opcode = Val->getOpcode();
1879 if (Opcode == ISD::SETCC) {
1880 SDValue LHS = Val->getOperand(0);
1881 SDValue RHS = Val->getOperand(1);
1882 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1883 bool isInteger = LHS.getValueType().isInteger();
1884 if (Negate)
1885 CC = getSetCCInverse(CC, LHS.getValueType());
1886 SDLoc DL(Val);
1887 // Determine OutCC and handle FP special case.
1888 if (isInteger) {
1889 OutCC = changeIntCCToAArch64CC(CC);
1890 } else {
1891 assert(LHS.getValueType().isFloatingPoint())((LHS.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1891, __PRETTY_FUNCTION__))
;
1892 AArch64CC::CondCode ExtraCC;
1893 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1894 // Some floating point conditions can't be tested with a single condition
1895 // code. Construct an additional comparison in this case.
1896 if (ExtraCC != AArch64CC::AL) {
1897 SDValue ExtraCmp;
1898 if (!CCOp.getNode())
1899 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1900 else
1901 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1902 ExtraCC, DL, DAG);
1903 CCOp = ExtraCmp;
1904 Predicate = ExtraCC;
1905 }
1906 }
1907
1908 // Produce a normal comparison if we are first in the chain
1909 if (!CCOp)
1910 return emitComparison(LHS, RHS, CC, DL, DAG);
1911 // Otherwise produce a ccmp.
1912 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
1913 DAG);
1914 }
1915 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")((Val->hasOneUse() && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1915, __PRETTY_FUNCTION__))
;
1916
1917 bool IsOR = Opcode == ISD::OR;
1918
1919 SDValue LHS = Val->getOperand(0);
1920 bool CanNegateL;
1921 bool MustBeFirstL;
1922 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
1923 assert(ValidL && "Valid conjunction/disjunction tree")((ValidL && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1923, __PRETTY_FUNCTION__))
;
1924 (void)ValidL;
1925
1926 SDValue RHS = Val->getOperand(1);
1927 bool CanNegateR;
1928 bool MustBeFirstR;
1929 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
1930 assert(ValidR && "Valid conjunction/disjunction tree")((ValidR && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1930, __PRETTY_FUNCTION__))
;
1931 (void)ValidR;
1932
1933 // Swap sub-tree that must come first to the right side.
1934 if (MustBeFirstL) {
1935 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")((!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1935, __PRETTY_FUNCTION__))
;
1936 std::swap(LHS, RHS);
1937 std::swap(CanNegateL, CanNegateR);
1938 std::swap(MustBeFirstL, MustBeFirstR);
1939 }
1940
1941 bool NegateR;
1942 bool NegateAfterR;
1943 bool NegateL;
1944 bool NegateAfterAll;
1945 if (Opcode == ISD::OR) {
1946 // Swap the sub-tree that we can negate naturally to the left.
1947 if (!CanNegateL) {
1948 assert(CanNegateR && "at least one side must be negatable")((CanNegateR && "at least one side must be negatable"
) ? static_cast<void> (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1948, __PRETTY_FUNCTION__))
;
1949 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")((!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1949, __PRETTY_FUNCTION__))
;
1950 assert(!Negate)((!Negate) ? static_cast<void> (0) : __assert_fail ("!Negate"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1950, __PRETTY_FUNCTION__))
;
1951 std::swap(LHS, RHS);
1952 NegateR = false;
1953 NegateAfterR = true;
1954 } else {
1955 // Negate the left sub-tree if possible, otherwise negate the result.
1956 NegateR = CanNegateR;
1957 NegateAfterR = !CanNegateR;
1958 }
1959 NegateL = true;
1960 NegateAfterAll = !Negate;
1961 } else {
1962 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")((Opcode == ISD::AND && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1962, __PRETTY_FUNCTION__))
;
1963 assert(!Negate && "Valid conjunction/disjunction tree")((!Negate && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1963, __PRETTY_FUNCTION__))
;
1964
1965 NegateL = false;
1966 NegateR = false;
1967 NegateAfterR = false;
1968 NegateAfterAll = false;
1969 }
1970
1971 // Emit sub-trees.
1972 AArch64CC::CondCode RHSCC;
1973 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
1974 if (NegateAfterR)
1975 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1976 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
1977 if (NegateAfterAll)
1978 OutCC = AArch64CC::getInvertedCondCode(OutCC);
1979 return CmpL;
1980}
1981
1982/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
1983/// In some cases this is even possible with OR operations in the expression.
1984/// See \ref AArch64CCMP.
1985/// \see emitConjunctionRec().
1986static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
1987 AArch64CC::CondCode &OutCC) {
1988 bool DummyCanNegate;
1989 bool DummyMustBeFirst;
1990 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
1991 return SDValue();
1992
1993 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
1994}
1995
1996/// @}
1997
1998/// Returns how profitable it is to fold a comparison's operand's shift and/or
1999/// extension operations.
2000static unsigned getCmpOperandFoldingProfit(SDValue Op) {
2001 auto isSupportedExtend = [&](SDValue V) {
2002 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2003 return true;
2004
2005 if (V.getOpcode() == ISD::AND)
2006 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2007 uint64_t Mask = MaskCst->getZExtValue();
2008 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2009 }
2010
2011 return false;
2012 };
2013
2014 if (!Op.hasOneUse())
2015 return 0;
2016
2017 if (isSupportedExtend(Op))
2018 return 1;
2019
2020 unsigned Opc = Op.getOpcode();
2021 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2022 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2023 uint64_t Shift = ShiftCst->getZExtValue();
2024 if (isSupportedExtend(Op.getOperand(0)))
2025 return (Shift <= 4) ? 2 : 1;
2026 EVT VT = Op.getValueType();
2027 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2028 return 1;
2029 }
2030
2031 return 0;
2032}
2033
2034static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2035 SDValue &AArch64cc, SelectionDAG &DAG,
2036 const SDLoc &dl) {
2037 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2038 EVT VT = RHS.getValueType();
2039 uint64_t C = RHSC->getZExtValue();
2040 if (!isLegalArithImmed(C)) {
2041 // Constant does not fit, try adjusting it by one?
2042 switch (CC) {
2043 default:
2044 break;
2045 case ISD::SETLT:
2046 case ISD::SETGE:
2047 if ((VT == MVT::i32 && C != 0x80000000 &&
2048 isLegalArithImmed((uint32_t)(C - 1))) ||
2049 (VT == MVT::i64 && C != 0x80000000ULL &&
2050 isLegalArithImmed(C - 1ULL))) {
2051 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2052 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2053 RHS = DAG.getConstant(C, dl, VT);
2054 }
2055 break;
2056 case ISD::SETULT:
2057 case ISD::SETUGE:
2058 if ((VT == MVT::i32 && C != 0 &&
2059 isLegalArithImmed((uint32_t)(C - 1))) ||
2060 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2061 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2062 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2063 RHS = DAG.getConstant(C, dl, VT);
2064 }
2065 break;
2066 case ISD::SETLE:
2067 case ISD::SETGT:
2068 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
2069 isLegalArithImmed((uint32_t)(C + 1))) ||
2070 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
2071 isLegalArithImmed(C + 1ULL))) {
2072 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2073 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2074 RHS = DAG.getConstant(C, dl, VT);
2075 }
2076 break;
2077 case ISD::SETULE:
2078 case ISD::SETUGT:
2079 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
2080 isLegalArithImmed((uint32_t)(C + 1))) ||
2081 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
2082 isLegalArithImmed(C + 1ULL))) {
2083 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2084 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2085 RHS = DAG.getConstant(C, dl, VT);
2086 }
2087 break;
2088 }
2089 }
2090 }
2091
2092 // Comparisons are canonicalized so that the RHS operand is simpler than the
2093 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2094 // can fold some shift+extend operations on the RHS operand, so swap the
2095 // operands if that can be done.
2096 //
2097 // For example:
2098 // lsl w13, w11, #1
2099 // cmp w13, w12
2100 // can be turned into:
2101 // cmp w12, w11, lsl #1
2102 if (!isa<ConstantSDNode>(RHS) ||
2103 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2104 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2105
2106 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
2107 std::swap(LHS, RHS);
2108 CC = ISD::getSetCCSwappedOperands(CC);
2109 }
2110 }
2111
2112 SDValue Cmp;
2113 AArch64CC::CondCode AArch64CC;
2114 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2115 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2116
2117 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2118 // For the i8 operand, the largest immediate is 255, so this can be easily
2119 // encoded in the compare instruction. For the i16 operand, however, the
2120 // largest immediate cannot be encoded in the compare.
2121 // Therefore, use a sign extending load and cmn to avoid materializing the
2122 // -1 constant. For example,
2123 // movz w1, #65535
2124 // ldrh w0, [x0, #0]
2125 // cmp w0, w1
2126 // >
2127 // ldrsh w0, [x0, #0]
2128 // cmn w0, #1
2129 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2130 // if and only if (sext LHS) == (sext RHS). The checks are in place to
2131 // ensure both the LHS and RHS are truly zero extended and to make sure the
2132 // transformation is profitable.
2133 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2134 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2135 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2136 LHS.getNode()->hasNUsesOfValue(1, 0)) {
2137 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2138 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2139 SDValue SExt =
2140 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2141 DAG.getValueType(MVT::i16));
2142 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2143 RHS.getValueType()),
2144 CC, dl, DAG);
2145 AArch64CC = changeIntCCToAArch64CC(CC);
2146 }
2147 }
2148
2149 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2150 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2151 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2152 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2153 }
2154 }
2155 }
2156
2157 if (!Cmp) {
2158 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
2159 AArch64CC = changeIntCCToAArch64CC(CC);
2160 }
2161 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
2162 return Cmp;
2163}
2164
2165static std::pair<SDValue, SDValue>
2166getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
2167 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2168, __PRETTY_FUNCTION__))
2168 "Unsupported value type")(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2168, __PRETTY_FUNCTION__))
;
2169 SDValue Value, Overflow;
2170 SDLoc DL(Op);
2171 SDValue LHS = Op.getOperand(0);
2172 SDValue RHS = Op.getOperand(1);
2173 unsigned Opc = 0;
2174 switch (Op.getOpcode()) {
2175 default:
2176 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2176)
;
2177 case ISD::SADDO:
2178 Opc = AArch64ISD::ADDS;
2179 CC = AArch64CC::VS;
2180 break;
2181 case ISD::UADDO:
2182 Opc = AArch64ISD::ADDS;
2183 CC = AArch64CC::HS;
2184 break;
2185 case ISD::SSUBO:
2186 Opc = AArch64ISD::SUBS;
2187 CC = AArch64CC::VS;
2188 break;
2189 case ISD::USUBO:
2190 Opc = AArch64ISD::SUBS;
2191 CC = AArch64CC::LO;
2192 break;
2193 // Multiply needs a little bit extra work.
2194 case ISD::SMULO:
2195 case ISD::UMULO: {
2196 CC = AArch64CC::NE;
2197 bool IsSigned = Op.getOpcode() == ISD::SMULO;
2198 if (Op.getValueType() == MVT::i32) {
2199 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2200 // For a 32 bit multiply with overflow check we want the instruction
2201 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
2202 // need to generate the following pattern:
2203 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
2204 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
2205 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
2206 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2207 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
2208 DAG.getConstant(0, DL, MVT::i64));
2209 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
2210 // operation. We need to clear out the upper 32 bits, because we used a
2211 // widening multiply that wrote all 64 bits. In the end this should be a
2212 // noop.
2213 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
2214 if (IsSigned) {
2215 // The signed overflow check requires more than just a simple check for
2216 // any bit set in the upper 32 bits of the result. These bits could be
2217 // just the sign bits of a negative number. To perform the overflow
2218 // check we have to arithmetic shift right the 32nd bit of the result by
2219 // 31 bits. Then we compare the result to the upper 32 bits.
2220 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
2221 DAG.getConstant(32, DL, MVT::i64));
2222 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
2223 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
2224 DAG.getConstant(31, DL, MVT::i64));
2225 // It is important that LowerBits is last, otherwise the arithmetic
2226 // shift will not be folded into the compare (SUBS).
2227 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
2228 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2229 .getValue(1);
2230 } else {
2231 // The overflow check for unsigned multiply is easy. We only need to
2232 // check if any of the upper 32 bits are set. This can be done with a
2233 // CMP (shifted register). For that we need to generate the following
2234 // pattern:
2235 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
2236 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2237 DAG.getConstant(32, DL, MVT::i64));
2238 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2239 Overflow =
2240 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2241 DAG.getConstant(0, DL, MVT::i64),
2242 UpperBits).getValue(1);
2243 }
2244 break;
2245 }
2246 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")((Op.getValueType() == MVT::i64 && "Expected an i64 value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2246, __PRETTY_FUNCTION__))
;
2247 // For the 64 bit multiply
2248 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2249 if (IsSigned) {
2250 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
2251 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
2252 DAG.getConstant(63, DL, MVT::i64));
2253 // It is important that LowerBits is last, otherwise the arithmetic
2254 // shift will not be folded into the compare (SUBS).
2255 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2256 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2257 .getValue(1);
2258 } else {
2259 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
2260 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2261 Overflow =
2262 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2263 DAG.getConstant(0, DL, MVT::i64),
2264 UpperBits).getValue(1);
2265 }
2266 break;
2267 }
2268 } // switch (...)
2269
2270 if (Opc) {
2271 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
2272
2273 // Emit the AArch64 operation with overflow check.
2274 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
2275 Overflow = Value.getValue(1);
2276 }
2277 return std::make_pair(Value, Overflow);
2278}
2279
2280SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
2281 RTLIB::Libcall Call) const {
2282 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2283 MakeLibCallOptions CallOptions;
2284 return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first;
2285}
2286
2287// Returns true if the given Op is the overflow flag result of an overflow
2288// intrinsic operation.
2289static bool isOverflowIntrOpRes(SDValue Op) {
2290 unsigned Opc = Op.getOpcode();
2291 return (Op.getResNo() == 1 &&
2292 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2293 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2294}
2295
2296static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
2297 SDValue Sel = Op.getOperand(0);
2298 SDValue Other = Op.getOperand(1);
2299 SDLoc dl(Sel);
2300
2301 // If the operand is an overflow checking operation, invert the condition
2302 // code and kill the Not operation. I.e., transform:
2303 // (xor (overflow_op_bool, 1))
2304 // -->
2305 // (csel 1, 0, invert(cc), overflow_op_bool)
2306 // ... which later gets transformed to just a cset instruction with an
2307 // inverted condition code, rather than a cset + eor sequence.
2308 if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
2309 // Only lower legal XALUO ops.
2310 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2311 return SDValue();
2312
2313 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2314 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2315 AArch64CC::CondCode CC;
2316 SDValue Value, Overflow;
2317 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2318 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2319 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2320 CCVal, Overflow);
2321 }
2322 // If neither operand is a SELECT_CC, give up.
2323 if (Sel.getOpcode() != ISD::SELECT_CC)
2324 std::swap(Sel, Other);
2325 if (Sel.getOpcode() != ISD::SELECT_CC)
2326 return Op;
2327
2328 // The folding we want to perform is:
2329 // (xor x, (select_cc a, b, cc, 0, -1) )
2330 // -->
2331 // (csel x, (xor x, -1), cc ...)
2332 //
2333 // The latter will get matched to a CSINV instruction.
2334
2335 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2336 SDValue LHS = Sel.getOperand(0);
2337 SDValue RHS = Sel.getOperand(1);
2338 SDValue TVal = Sel.getOperand(2);
2339 SDValue FVal = Sel.getOperand(3);
2340
2341 // FIXME: This could be generalized to non-integer comparisons.
2342 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2343 return Op;
2344
2345 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2346 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2347
2348 // The values aren't constants, this isn't the pattern we're looking for.
2349 if (!CFVal || !CTVal)
2350 return Op;
2351
2352 // We can commute the SELECT_CC by inverting the condition. This
2353 // might be needed to make this fit into a CSINV pattern.
2354 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2355 std::swap(TVal, FVal);
2356 std::swap(CTVal, CFVal);
2357 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
2358 }
2359
2360 // If the constants line up, perform the transform!
2361 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2362 SDValue CCVal;
2363 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2364
2365 FVal = Other;
2366 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2367 DAG.getConstant(-1ULL, dl, Other.getValueType()));
2368
2369 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2370 CCVal, Cmp);
2371 }
2372
2373 return Op;
2374}
2375
2376static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2377 EVT VT = Op.getValueType();
2378
2379 // Let legalize expand this if it isn't a legal type yet.
2380 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2381 return SDValue();
2382
2383 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2384
2385 unsigned Opc;
2386 bool ExtraOp = false;
2387 switch (Op.getOpcode()) {
2388 default:
2389 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2389)
;
2390 case ISD::ADDC:
2391 Opc = AArch64ISD::ADDS;
2392 break;
2393 case ISD::SUBC:
2394 Opc = AArch64ISD::SUBS;
2395 break;
2396 case ISD::ADDE:
2397 Opc = AArch64ISD::ADCS;
2398 ExtraOp = true;
2399 break;
2400 case ISD::SUBE:
2401 Opc = AArch64ISD::SBCS;
2402 ExtraOp = true;
2403 break;
2404 }
2405
2406 if (!ExtraOp)
2407 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2408 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2409 Op.getOperand(2));
2410}
2411
2412static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
2413 // Let legalize expand this if it isn't a legal type yet.
2414 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
2415 return SDValue();
2416
2417 SDLoc dl(Op);
2418 AArch64CC::CondCode CC;
2419 // The actual operation that sets the overflow or carry flag.
2420 SDValue Value, Overflow;
2421 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2422
2423 // We use 0 and 1 as false and true values.
2424 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2425 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2426
2427 // We use an inverted condition, because the conditional select is inverted
2428 // too. This will allow it to be selected to a single instruction:
2429 // CSINC Wd, WZR, WZR, invert(cond).
2430 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2431 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2432 CCVal, Overflow);
2433
2434 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2435 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2436}
2437
2438// Prefetch operands are:
2439// 1: Address to prefetch
2440// 2: bool isWrite
2441// 3: int locality (0 = no locality ... 3 = extreme locality)
2442// 4: bool isDataCache
2443static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
2444 SDLoc DL(Op);
2445 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2446 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2447 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2448
2449 bool IsStream = !Locality;
2450 // When the locality number is set
2451 if (Locality) {
2452 // The front-end should have filtered out the out-of-range values
2453 assert(Locality <= 3 && "Prefetch locality out-of-range")((Locality <= 3 && "Prefetch locality out-of-range"
) ? static_cast<void> (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2453, __PRETTY_FUNCTION__))
;
2454 // The locality degree is the opposite of the cache speed.
2455 // Put the number the other way around.
2456 // The encoding starts at 0 for level 1
2457 Locality = 3 - Locality;
2458 }
2459
2460 // built the mask value encoding the expected behavior.
2461 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
2462 (!IsData << 3) | // IsDataCache bit
2463 (Locality << 1) | // Cache level bits
2464 (unsigned)IsStream; // Stream bit
2465 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2466 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2467}
2468
2469SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2470 SelectionDAG &DAG) const {
2471 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")((Op.getValueType() == MVT::f128 && "Unexpected lowering"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2471, __PRETTY_FUNCTION__))
;
2472
2473 RTLIB::Libcall LC;
2474 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
2475
2476 return LowerF128Call(Op, DAG, LC);
2477}
2478
2479SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2480 SelectionDAG &DAG) const {
2481 if (Op.getOperand(0).getValueType() != MVT::f128) {
2482 // It's legal except when f128 is involved
2483 return Op;
2484 }
2485
2486 RTLIB::Libcall LC;
2487 LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
2488
2489 // FP_ROUND node has a second operand indicating whether it is known to be
2490 // precise. That doesn't take part in the LibCall so we can't directly use
2491 // LowerF128Call.
2492 SDValue SrcVal = Op.getOperand(0);
2493 MakeLibCallOptions CallOptions;
2494 return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, CallOptions,
2495 SDLoc(Op)).first;
2496}
2497
2498SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
2499 SelectionDAG &DAG) const {
2500 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2501 // Any additional optimization in this function should be recorded
2502 // in the cost tables.
2503 EVT InVT = Op.getOperand(0).getValueType();
2504 EVT VT = Op.getValueType();
2505 unsigned NumElts = InVT.getVectorNumElements();
2506
2507 // f16 conversions are promoted to f32 when full fp16 is not supported.
2508 if (InVT.getVectorElementType() == MVT::f16 &&
2509 !Subtarget->hasFullFP16()) {
2510 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2511 SDLoc dl(Op);
2512 return DAG.getNode(
2513 Op.getOpcode(), dl, Op.getValueType(),
2514 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2515 }
2516
2517 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2518 SDLoc dl(Op);
2519 SDValue Cv =
2520 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2521 Op.getOperand(0));
2522 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2523 }
2524
2525 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2526 SDLoc dl(Op);
2527 MVT ExtVT =
2528 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
2529 VT.getVectorNumElements());
2530 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2531 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2532 }
2533
2534 // Type changing conversions are illegal.
2535 return Op;
2536}
2537
2538SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2539 SelectionDAG &DAG) const {
2540 if (Op.getOperand(0).getValueType().isVector())
2541 return LowerVectorFP_TO_INT(Op, DAG);
2542
2543 // f16 conversions are promoted to f32 when full fp16 is not supported.
2544 if (Op.getOperand(0).getValueType() == MVT::f16 &&
2545 !Subtarget->hasFullFP16()) {
2546 SDLoc dl(Op);
2547 return DAG.getNode(
2548 Op.getOpcode(), dl, Op.getValueType(),
2549 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
2550 }
2551
2552 if (Op.getOperand(0).getValueType() != MVT::f128) {
2553 // It's legal except when f128 is involved
2554 return Op;
2555 }
2556
2557 RTLIB::Libcall LC;
2558 if (Op.getOpcode() == ISD::FP_TO_SINT)
2559 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
2560 else
2561 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
2562
2563 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2564 MakeLibCallOptions CallOptions;
2565 return makeLibCall(DAG, LC, Op.getValueType(), Ops, CallOptions, SDLoc(Op)).first;
2566}
2567
2568static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2569 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2570 // Any additional optimization in this function should be recorded
2571 // in the cost tables.
2572 EVT VT = Op.getValueType();
2573 SDLoc dl(Op);
2574 SDValue In = Op.getOperand(0);
2575 EVT InVT = In.getValueType();
2576
2577 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2578 MVT CastVT =
2579 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
2580 InVT.getVectorNumElements());
2581 In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2582 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2583 }
2584
2585 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2586 unsigned CastOpc =
2587 Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2588 EVT CastVT = VT.changeVectorElementTypeToInteger();
2589 In = DAG.getNode(CastOpc, dl, CastVT, In);
2590 return DAG.getNode(Op.getOpcode(), dl, VT, In);
2591 }
2592
2593 return Op;
2594}
2595
2596SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2597 SelectionDAG &DAG) const {
2598 if (Op.getValueType().isVector())
2599 return LowerVectorINT_TO_FP(Op, DAG);
2600
2601 // f16 conversions are promoted to f32 when full fp16 is not supported.
2602 if (Op.getValueType() == MVT::f16 &&
2603 !Subtarget->hasFullFP16()) {
2604 SDLoc dl(Op);
2605 return DAG.getNode(
2606 ISD::FP_ROUND, dl, MVT::f16,
2607 DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
2608 DAG.getIntPtrConstant(0, dl));
2609 }
2610
2611 // i128 conversions are libcalls.
2612 if (Op.getOperand(0).getValueType() == MVT::i128)
2613 return SDValue();
2614
2615 // Other conversions are legal, unless it's to the completely software-based
2616 // fp128.
2617 if (Op.getValueType() != MVT::f128)
2618 return Op;
2619
2620 RTLIB::Libcall LC;
2621 if (Op.getOpcode() == ISD::SINT_TO_FP)
2622 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2623 else
2624 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2625
2626 return LowerF128Call(Op, DAG, LC);
2627}
2628
2629SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2630 SelectionDAG &DAG) const {
2631 // For iOS, we want to call an alternative entry point: __sincos_stret,
2632 // which returns the values in two S / D registers.
2633 SDLoc dl(Op);
2634 SDValue Arg = Op.getOperand(0);
2635 EVT ArgVT = Arg.getValueType();
2636 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2637
2638 ArgListTy Args;
2639 ArgListEntry Entry;
2640
2641 Entry.Node = Arg;
2642 Entry.Ty = ArgTy;
2643 Entry.IsSExt = false;
2644 Entry.IsZExt = false;
2645 Args.push_back(Entry);
2646
2647 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
2648 : RTLIB::SINCOS_STRET_F32;
2649 const char *LibcallName = getLibcallName(LC);
2650 SDValue Callee =
2651 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2652
2653 StructType *RetTy = StructType::get(ArgTy, ArgTy);
2654 TargetLowering::CallLoweringInfo CLI(DAG);
2655 CLI.setDebugLoc(dl)
2656 .setChain(DAG.getEntryNode())
2657 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2658
2659 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2660 return CallResult.first;
2661}
2662
2663static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2664 if (Op.getValueType() != MVT::f16)
2665 return SDValue();
2666
2667 assert(Op.getOperand(0).getValueType() == MVT::i16)((Op.getOperand(0).getValueType() == MVT::i16) ? static_cast<
void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2667, __PRETTY_FUNCTION__))
;
2668 SDLoc DL(Op);
2669
2670 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2671 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2672 return SDValue(
2673 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2674 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2675 0);
2676}
2677
2678static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2679 if (OrigVT.getSizeInBits() >= 64)
2680 return OrigVT;
2681
2682 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2682, __PRETTY_FUNCTION__))
;
2683
2684 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2685 switch (OrigSimpleTy) {
2686 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2686)
;
2687 case MVT::v2i8:
2688 case MVT::v2i16:
2689 return MVT::v2i32;
2690 case MVT::v4i8:
2691 return MVT::v4i16;
2692 }
2693}
2694
2695static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2696 const EVT &OrigTy,
2697 const EVT &ExtTy,
2698 unsigned ExtOpcode) {
2699 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2700 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2701 // 64-bits we need to insert a new extension so that it will be 64-bits.
2702 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2702, __PRETTY_FUNCTION__))
;
2703 if (OrigTy.getSizeInBits() >= 64)
2704 return N;
2705
2706 // Must extend size to at least 64 bits to be used as an operand for VMULL.
2707 EVT NewVT = getExtensionTo64Bits(OrigTy);
2708
2709 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2710}
2711
2712static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2713 bool isSigned) {
2714 EVT VT = N->getValueType(0);
2715
2716 if (N->getOpcode() != ISD::BUILD_VECTOR)
2717 return false;
2718
2719 for (const SDValue &Elt : N->op_values()) {
2720 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2721 unsigned EltSize = VT.getScalarSizeInBits();
2722 unsigned HalfSize = EltSize / 2;
2723 if (isSigned) {
2724 if (!isIntN(HalfSize, C->getSExtValue()))
2725 return false;
2726 } else {
2727 if (!isUIntN(HalfSize, C->getZExtValue()))
2728 return false;
2729 }
2730 continue;
2731 }
2732 return false;
2733 }
2734
2735 return true;
2736}
2737
2738static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2739 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2740 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2741 N->getOperand(0)->getValueType(0),
2742 N->getValueType(0),
2743 N->getOpcode());
2744
2745 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2745, __PRETTY_FUNCTION__))
;
2746 EVT VT = N->getValueType(0);
2747 SDLoc dl(N);
2748 unsigned EltSize = VT.getScalarSizeInBits() / 2;
2749 unsigned NumElts = VT.getVectorNumElements();
2750 MVT TruncVT = MVT::getIntegerVT(EltSize);
2751 SmallVector<SDValue, 8> Ops;
2752 for (unsigned i = 0; i != NumElts; ++i) {
2753 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2754 const APInt &CInt = C->getAPIntValue();
2755 // Element types smaller than 32 bits are not legal, so use i32 elements.
2756 // The values are implicitly truncated so sext vs. zext doesn't matter.
2757 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2758 }
2759 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2760}
2761
2762static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2763 return N->getOpcode() == ISD::SIGN_EXTEND ||
2764 isExtendedBUILD_VECTOR(N, DAG, true);
2765}
2766
2767static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2768 return N->getOpcode() == ISD::ZERO_EXTEND ||
2769 isExtendedBUILD_VECTOR(N, DAG, false);
2770}
2771
2772static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2773 unsigned Opcode = N->getOpcode();
2774 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2775 SDNode *N0 = N->getOperand(0).getNode();
2776 SDNode *N1 = N->getOperand(1).getNode();
2777 return N0->hasOneUse() && N1->hasOneUse() &&
2778 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2779 }
2780 return false;
2781}
2782
2783static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2784 unsigned Opcode = N->getOpcode();
2785 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2786 SDNode *N0 = N->getOperand(0).getNode();
2787 SDNode *N1 = N->getOperand(1).getNode();
2788 return N0->hasOneUse() && N1->hasOneUse() &&
2789 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2790 }
2791 return false;
2792}
2793
2794SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2795 SelectionDAG &DAG) const {
2796 // The rounding mode is in bits 23:22 of the FPSCR.
2797 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2798 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2799 // so that the shift + and get folded into a bitfield extract.
2800 SDLoc dl(Op);
2801
2802 SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
2803 DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl,
2804 MVT::i64));
2805 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
2806 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
2807 DAG.getConstant(1U << 22, dl, MVT::i32));
2808 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2809 DAG.getConstant(22, dl, MVT::i32));
2810 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2811 DAG.getConstant(3, dl, MVT::i32));
2812}
2813
2814static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2815 // Multiplications are only custom-lowered for 128-bit vectors so that
2816 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2817 EVT VT = Op.getValueType();
2818 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2819, __PRETTY_FUNCTION__))
2819 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2819, __PRETTY_FUNCTION__))
;
2820 SDNode *N0 = Op.getOperand(0).getNode();
2821 SDNode *N1 = Op.getOperand(1).getNode();
2822 unsigned NewOpc = 0;
2823 bool isMLA = false;
2824 bool isN0SExt = isSignExtended(N0, DAG);
2825 bool isN1SExt = isSignExtended(N1, DAG);
2826 if (isN0SExt && isN1SExt)
2827 NewOpc = AArch64ISD::SMULL;
2828 else {
2829 bool isN0ZExt = isZeroExtended(N0, DAG);
2830 bool isN1ZExt = isZeroExtended(N1, DAG);
2831 if (isN0ZExt && isN1ZExt)
2832 NewOpc = AArch64ISD::UMULL;
2833 else if (isN1SExt || isN1ZExt) {
2834 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2835 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2836 if (isN1SExt && isAddSubSExt(N0, DAG)) {
2837 NewOpc = AArch64ISD::SMULL;
2838 isMLA = true;
2839 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2840 NewOpc = AArch64ISD::UMULL;
2841 isMLA = true;
2842 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2843 std::swap(N0, N1);
2844 NewOpc = AArch64ISD::UMULL;
2845 isMLA = true;
2846 }
2847 }
2848
2849 if (!NewOpc) {
2850 if (VT == MVT::v2i64)
2851 // Fall through to expand this. It is not legal.
2852 return SDValue();
2853 else
2854 // Other vector multiplications are legal.
2855 return Op;
2856 }
2857 }
2858
2859 // Legalize to a S/UMULL instruction
2860 SDLoc DL(Op);
2861 SDValue Op0;
2862 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2863 if (!isMLA) {
2864 Op0 = skipExtensionForVectorMULL(N0, DAG);
2865 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2867, __PRETTY_FUNCTION__))
2866 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2867, __PRETTY_FUNCTION__))
2867 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2867, __PRETTY_FUNCTION__))
;
2868 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2869 }
2870 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2871 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2872 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2873 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2874 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2875 EVT Op1VT = Op1.getValueType();
2876 return DAG.getNode(N0->getOpcode(), DL, VT,
2877 DAG.getNode(NewOpc, DL, VT,
2878 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2879 DAG.getNode(NewOpc, DL, VT,
2880 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2881}
2882
2883SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2884 SelectionDAG &DAG) const {
2885 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2886 SDLoc dl(Op);
2887 switch (IntNo) {
2888 default: return SDValue(); // Don't custom lower most intrinsics.
2889 case Intrinsic::thread_pointer: {
2890 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2891 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2892 }
2893 case Intrinsic::aarch64_neon_abs: {
2894 EVT Ty = Op.getValueType();
2895 if (Ty == MVT::i64) {
2896 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
2897 Op.getOperand(1));
2898 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
2899 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
2900 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
2901 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
2902 } else {
2903 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
2904 }
2905 }
2906 case Intrinsic::aarch64_neon_smax:
2907 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2908 Op.getOperand(1), Op.getOperand(2));
2909 case Intrinsic::aarch64_neon_umax:
2910 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2911 Op.getOperand(1), Op.getOperand(2));
2912 case Intrinsic::aarch64_neon_smin:
2913 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2914 Op.getOperand(1), Op.getOperand(2));
2915 case Intrinsic::aarch64_neon_umin:
2916 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2917 Op.getOperand(1), Op.getOperand(2));
2918
2919 case Intrinsic::aarch64_sve_sunpkhi:
2920 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
2921 Op.getOperand(1));
2922 case Intrinsic::aarch64_sve_sunpklo:
2923 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
2924 Op.getOperand(1));
2925 case Intrinsic::aarch64_sve_uunpkhi:
2926 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
2927 Op.getOperand(1));
2928 case Intrinsic::aarch64_sve_uunpklo:
2929 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
2930 Op.getOperand(1));
2931 case Intrinsic::aarch64_sve_clasta_n:
2932 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
2933 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
2934 case Intrinsic::aarch64_sve_clastb_n:
2935 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
2936 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
2937 case Intrinsic::aarch64_sve_lasta:
2938 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
2939 Op.getOperand(1), Op.getOperand(2));
2940 case Intrinsic::aarch64_sve_lastb:
2941 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
2942 Op.getOperand(1), Op.getOperand(2));
2943 case Intrinsic::aarch64_sve_rev:
2944 return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(),
2945 Op.getOperand(1));
2946 case Intrinsic::aarch64_sve_tbl:
2947 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
2948 Op.getOperand(1), Op.getOperand(2));
2949 case Intrinsic::aarch64_sve_trn1:
2950 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
2951 Op.getOperand(1), Op.getOperand(2));
2952 case Intrinsic::aarch64_sve_trn2:
2953 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
2954 Op.getOperand(1), Op.getOperand(2));
2955 case Intrinsic::aarch64_sve_uzp1:
2956 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
2957 Op.getOperand(1), Op.getOperand(2));
2958 case Intrinsic::aarch64_sve_uzp2:
2959 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
2960 Op.getOperand(1), Op.getOperand(2));
2961 case Intrinsic::aarch64_sve_zip1:
2962 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
2963 Op.getOperand(1), Op.getOperand(2));
2964 case Intrinsic::aarch64_sve_zip2:
2965 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
2966 Op.getOperand(1), Op.getOperand(2));
2967 case Intrinsic::aarch64_sve_ptrue:
2968 return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
2969 Op.getOperand(1));
2970
2971 case Intrinsic::aarch64_sve_insr: {
2972 SDValue Scalar = Op.getOperand(2);
2973 EVT ScalarTy = Scalar.getValueType();
2974 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
2975 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
2976
2977 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
2978 Op.getOperand(1), Scalar);
2979 }
2980
2981 case Intrinsic::localaddress: {
2982 const auto &MF = DAG.getMachineFunction();
2983 const auto *RegInfo = Subtarget->getRegisterInfo();
2984 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
2985 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
2986 Op.getSimpleValueType());
2987 }
2988
2989 case Intrinsic::eh_recoverfp: {
2990 // FIXME: This needs to be implemented to correctly handle highly aligned
2991 // stack objects. For now we simply return the incoming FP. Refer D53541
2992 // for more details.
2993 SDValue FnOp = Op.getOperand(1);
2994 SDValue IncomingFPOp = Op.getOperand(2);
2995 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
2996 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
2997 if (!Fn)
2998 report_fatal_error(
2999 "llvm.eh.recoverfp must take a function as the first argument");
3000 return IncomingFPOp;
3001 }
3002 }
3003}
3004
3005bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
3006 return ExtVal.getValueType().isScalableVector();
3007}
3008
3009// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
3010static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
3011 EVT VT, EVT MemVT,
3012 SelectionDAG &DAG) {
3013 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3013, __PRETTY_FUNCTION__))
;
3014 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)((MemVT == MVT::v4i8 && VT == MVT::v4i16) ? static_cast
<void> (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3014, __PRETTY_FUNCTION__))
;
3015
3016 SDValue Value = ST->getValue();
3017
3018 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
3019 // the word lane which represent the v4i8 subvector. It optimizes the store
3020 // to:
3021 //
3022 // xtn v0.8b, v0.8h
3023 // str s0, [x0]
3024
3025 SDValue Undef = DAG.getUNDEF(MVT::i16);
3026 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
3027 {Undef, Undef, Undef, Undef});
3028
3029 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
3030 Value, UndefVec);
3031 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
3032
3033 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
3034 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3035 Trunc, DAG.getConstant(0, DL, MVT::i64));
3036
3037 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
3038 ST->getBasePtr(), ST->getMemOperand());
3039}
3040
3041// Custom lowering for any store, vector or scalar and/or default or with
3042// a truncate operations. Currently only custom lower truncate operation
3043// from vector v4i16 to v4i8 or volatile stores of i128.
3044SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
3045 SelectionDAG &DAG) const {
3046 SDLoc Dl(Op);
3047 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
3048 assert (StoreNode && "Can only custom lower store nodes")((StoreNode && "Can only custom lower store nodes") ?
static_cast<void> (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3048, __PRETTY_FUNCTION__))
;
3049
3050 SDValue Value = StoreNode->getValue();
3051
3052 EVT VT = Value.getValueType();
3053 EVT MemVT = StoreNode->getMemoryVT();
3054
3055 if (VT.isVector()) {
3056 unsigned AS = StoreNode->getAddressSpace();
3057 unsigned Align = StoreNode->getAlignment();
3058 if (Align < MemVT.getStoreSize() &&
3059 !allowsMisalignedMemoryAccesses(MemVT, AS, Align,
3060 StoreNode->getMemOperand()->getFlags(),
3061 nullptr)) {
3062 return scalarizeVectorStore(StoreNode, DAG);
3063 }
3064
3065 if (StoreNode->isTruncatingStore()) {
3066 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
3067 }
3068 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
3069 assert(StoreNode->getValue()->getValueType(0) == MVT::i128)((StoreNode->getValue()->getValueType(0) == MVT::i128) ?
static_cast<void> (0) : __assert_fail ("StoreNode->getValue()->getValueType(0) == MVT::i128"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3069, __PRETTY_FUNCTION__))
;
3070 SDValue Lo =
3071 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
3072 DAG.getConstant(0, Dl, MVT::i64));
3073 SDValue Hi =
3074 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
3075 DAG.getConstant(1, Dl, MVT::i64));
3076 SDValue Result = DAG.getMemIntrinsicNode(
3077 AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
3078 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
3079 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
3080 return Result;
3081 }
3082
3083 return SDValue();
3084}
3085
3086SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
3087 SelectionDAG &DAG) const {
3088 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
3089 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
3090
3091 switch (Op.getOpcode()) {
3092 default:
3093 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3093)
;
3094 return SDValue();
3095 case ISD::BITCAST:
3096 return LowerBITCAST(Op, DAG);
3097 case ISD::GlobalAddress:
3098 return LowerGlobalAddress(Op, DAG);
3099 case ISD::GlobalTLSAddress:
3100 return LowerGlobalTLSAddress(Op, DAG);
3101 case ISD::SETCC:
3102 return LowerSETCC(Op, DAG);
3103 case ISD::BR_CC:
3104 return LowerBR_CC(Op, DAG);
3105 case ISD::SELECT:
3106 return LowerSELECT(Op, DAG);
3107 case ISD::SELECT_CC:
3108 return LowerSELECT_CC(Op, DAG);
3109 case ISD::JumpTable:
3110 return LowerJumpTable(Op, DAG);
3111 case ISD::BR_JT:
3112 return LowerBR_JT(Op, DAG);
3113 case ISD::ConstantPool:
3114 return LowerConstantPool(Op, DAG);
3115 case ISD::BlockAddress:
3116 return LowerBlockAddress(Op, DAG);
3117 case ISD::VASTART:
3118 return LowerVASTART(Op, DAG);
3119 case ISD::VACOPY:
3120 return LowerVACOPY(Op, DAG);
3121 case ISD::VAARG:
3122 return LowerVAARG(Op, DAG);
3123 case ISD::ADDC:
3124 case ISD::ADDE:
3125 case ISD::SUBC:
3126 case ISD::SUBE:
3127 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
3128 case ISD::SADDO:
3129 case ISD::UADDO:
3130 case ISD::SSUBO:
3131 case ISD::USUBO:
3132 case ISD::SMULO:
3133 case ISD::UMULO:
3134 return LowerXALUO(Op, DAG);
3135 case ISD::FADD:
3136 return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
3137 case ISD::FSUB:
3138 return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
3139 case ISD::FMUL:
3140 return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
3141 case ISD::FDIV:
3142 return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
3143 case ISD::FP_ROUND:
3144 return LowerFP_ROUND(Op, DAG);
3145 case ISD::FP_EXTEND:
3146 return LowerFP_EXTEND(Op, DAG);
3147 case ISD::FRAMEADDR:
3148 return LowerFRAMEADDR(Op, DAG);
3149 case ISD::SPONENTRY:
3150 return LowerSPONENTRY(Op, DAG);
3151 case ISD::RETURNADDR:
3152 return LowerRETURNADDR(Op, DAG);
3153 case ISD::ADDROFRETURNADDR:
3154 return LowerADDROFRETURNADDR(Op, DAG);
3155 case ISD::INSERT_VECTOR_ELT:
3156 return LowerINSERT_VECTOR_ELT(Op, DAG);
3157 case ISD::EXTRACT_VECTOR_ELT:
3158 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3159 case ISD::BUILD_VECTOR:
3160 return LowerBUILD_VECTOR(Op, DAG);
3161 case ISD::VECTOR_SHUFFLE:
3162 return LowerVECTOR_SHUFFLE(Op, DAG);
3163 case ISD::SPLAT_VECTOR:
3164 return LowerSPLAT_VECTOR(Op, DAG);
3165 case ISD::EXTRACT_SUBVECTOR:
3166 return LowerEXTRACT_SUBVECTOR(Op, DAG);
3167 case ISD::SRA:
3168 case ISD::SRL:
3169 case ISD::SHL:
3170 return LowerVectorSRA_SRL_SHL(Op, DAG);
3171 case ISD::SHL_PARTS:
3172 return LowerShiftLeftParts(Op, DAG);
3173 case ISD::SRL_PARTS:
3174 case ISD::SRA_PARTS:
3175 return LowerShiftRightParts(Op, DAG);
3176 case ISD::CTPOP:
3177 return LowerCTPOP(Op, DAG);
3178 case ISD::FCOPYSIGN:
3179 return LowerFCOPYSIGN(Op, DAG);
3180 case ISD::OR:
3181 return LowerVectorOR(Op, DAG);
3182 case ISD::XOR:
3183 return LowerXOR(Op, DAG);
3184 case ISD::PREFETCH:
3185 return LowerPREFETCH(Op, DAG);
3186 case ISD::SINT_TO_FP:
3187 case ISD::UINT_TO_FP:
3188 return LowerINT_TO_FP(Op, DAG);
3189 case ISD::FP_TO_SINT:
3190 case ISD::FP_TO_UINT:
3191 return LowerFP_TO_INT(Op, DAG);
3192 case ISD::FSINCOS:
3193 return LowerFSINCOS(Op, DAG);
3194 case ISD::FLT_ROUNDS_:
3195 return LowerFLT_ROUNDS_(Op, DAG);
3196 case ISD::MUL:
3197 return LowerMUL(Op, DAG);
3198 case ISD::INTRINSIC_WO_CHAIN:
3199 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3200 case ISD::STORE:
3201 return LowerSTORE(Op, DAG);
3202 case ISD::VECREDUCE_ADD:
3203 case ISD::VECREDUCE_SMAX:
3204 case ISD::VECREDUCE_SMIN:
3205 case ISD::VECREDUCE_UMAX:
3206 case ISD::VECREDUCE_UMIN:
3207 case ISD::VECREDUCE_FMAX:
3208 case ISD::VECREDUCE_FMIN:
3209 return LowerVECREDUCE(Op, DAG);
3210 case ISD::ATOMIC_LOAD_SUB:
3211 return LowerATOMIC_LOAD_SUB(Op, DAG);
3212 case ISD::ATOMIC_LOAD_AND:
3213 return LowerATOMIC_LOAD_AND(Op, DAG);
3214 case ISD::DYNAMIC_STACKALLOC:
3215 return LowerDYNAMIC_STACKALLOC(Op, DAG);
3216 }
3217}
3218
3219//===----------------------------------------------------------------------===//
3220// Calling Convention Implementation
3221//===----------------------------------------------------------------------===//
3222
3223/// Selects the correct CCAssignFn for a given CallingConvention value.
3224CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
3225 bool IsVarArg) const {
3226 switch (CC) {
3227 default:
3228 report_fatal_error("Unsupported calling convention.");
3229 case CallingConv::AArch64_SVE_VectorCall:
3230 // Calling SVE functions is currently not yet supported.
3231 report_fatal_error("Unsupported calling convention.");
3232 case CallingConv::WebKit_JS:
3233 return CC_AArch64_WebKit_JS;
3234 case CallingConv::GHC:
3235 return CC_AArch64_GHC;
3236 case CallingConv::C:
3237 case CallingConv::Fast:
3238 case CallingConv::PreserveMost:
3239 case CallingConv::CXX_FAST_TLS:
3240 case CallingConv::Swift:
3241 if (Subtarget->isTargetWindows() && IsVarArg)
3242 return CC_AArch64_Win64_VarArg;
3243 if (!Subtarget->isTargetDarwin())
3244 return CC_AArch64_AAPCS;
3245 if (!IsVarArg)
3246 return CC_AArch64_DarwinPCS;
3247 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
3248 : CC_AArch64_DarwinPCS_VarArg;
3249 case CallingConv::Win64:
3250 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
3251 case CallingConv::CFGuard_Check:
3252 return CC_AArch64_Win64_CFGuard_Check;
3253 case CallingConv::AArch64_VectorCall:
3254 return CC_AArch64_AAPCS;
3255 }
3256}
3257
3258CCAssignFn *
3259AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
3260 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3261 : RetCC_AArch64_AAPCS;
3262}
3263
3264SDValue AArch64TargetLowering::LowerFormalArguments(
3265 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3266 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3267 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3268 MachineFunction &MF = DAG.getMachineFunction();
3269 MachineFrameInfo &MFI = MF.getFrameInfo();
3270 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3271
3272 // Assign locations to all of the incoming arguments.
3273 SmallVector<CCValAssign, 16> ArgLocs;
3274 DenseMap<unsigned, SDValue> CopiedRegs;
3275 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3276 *DAG.getContext());
3277
3278 // At this point, Ins[].VT may already be promoted to i32. To correctly
3279 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3280 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3281 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
3282 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
3283 // LocVT.
3284 unsigned NumArgs = Ins.size();
3285 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
3286 unsigned CurArgIdx = 0;
3287 for (unsigned i = 0; i != NumArgs; ++i) {
3288 MVT ValVT = Ins[i].VT;
3289 if (Ins[i].isOrigArg()) {
3290 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
3291 CurArgIdx = Ins[i].getOrigArgIndex();
3292
3293 // Get type of the original argument.
3294 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
3295 /*AllowUnknown*/ true);
3296 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
3297 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3298 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3299 ValVT = MVT::i8;
3300 else if (ActualMVT == MVT::i16)
3301 ValVT = MVT::i16;
3302 }
3303 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3304 bool Res =
3305 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
3306 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3306, __PRETTY_FUNCTION__))
;
3307 (void)Res;
3308 }
3309 assert(ArgLocs.size() == Ins.size())((ArgLocs.size() == Ins.size()) ? static_cast<void> (0)
: __assert_fail ("ArgLocs.size() == Ins.size()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3309, __PRETTY_FUNCTION__))
;
3310 SmallVector<SDValue, 16> ArgValues;
3311 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3312 CCValAssign &VA = ArgLocs[i];
3313
3314 if (Ins[i].Flags.isByVal()) {
3315 // Byval is used for HFAs in the PCS, but the system should work in a
3316 // non-compliant manner for larger structs.
3317 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3318 int Size = Ins[i].Flags.getByValSize();
3319 unsigned NumRegs = (Size + 7) / 8;
3320
3321 // FIXME: This works on big-endian for composite byvals, which are the common
3322 // case. It should also work for fundamental types too.
3323 unsigned FrameIdx =
3324 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
3325 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
3326 InVals.push_back(FrameIdxN);
3327
3328 continue;
3329 }
3330
3331 SDValue ArgValue;
3332 if (VA.isRegLoc()) {
3333 // Arguments stored in registers.
3334 EVT RegVT = VA.getLocVT();
3335 const TargetRegisterClass *RC;
3336
3337 if (RegVT == MVT::i32)
3338 RC = &AArch64::GPR32RegClass;
3339 else if (RegVT == MVT::i64)
3340 RC = &AArch64::GPR64RegClass;
3341 else if (RegVT == MVT::f16)
3342 RC = &AArch64::FPR16RegClass;
3343 else if (RegVT == MVT::f32)
3344 RC = &AArch64::FPR32RegClass;
3345 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
3346 RC = &AArch64::FPR64RegClass;
3347 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
3348 RC = &AArch64::FPR128RegClass;
3349 else if (RegVT.isScalableVector() &&
3350 RegVT.getVectorElementType() == MVT::i1)
3351 RC = &AArch64::PPRRegClass;
3352 else if (RegVT.isScalableVector())
3353 RC = &AArch64::ZPRRegClass;
3354 else
3355 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3355)
;
3356
3357 // Transform the arguments in physical registers into virtual ones.
3358 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3359 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
3360
3361 // If this is an 8, 16 or 32-bit value, it is really passed promoted
3362 // to 64 bits. Insert an assert[sz]ext to capture this, then
3363 // truncate to the right size.
3364 switch (VA.getLocInfo()) {
3365 default:
3366 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3366)
;
3367 case CCValAssign::Full:
3368 break;
3369 case CCValAssign::Indirect:
3370 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3371, __PRETTY_FUNCTION__))
3371 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3371, __PRETTY_FUNCTION__))
;
3372 llvm_unreachable("Spilling of SVE vectors not yet implemented")::llvm::llvm_unreachable_internal("Spilling of SVE vectors not yet implemented"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3372)
;
3373 case CCValAssign::BCvt:
3374 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
3375 break;
3376 case CCValAssign::AExt:
3377 case CCValAssign::SExt:
3378 case CCValAssign::ZExt:
3379 break;
3380 case CCValAssign::AExtUpper:
3381 ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
3382 DAG.getConstant(32, DL, RegVT));
3383 ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
3384 break;
3385 }
3386 } else { // VA.isRegLoc()
3387 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")((VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3387, __PRETTY_FUNCTION__))
;
3388 unsigned ArgOffset = VA.getLocMemOffset();
3389 unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
3390
3391 uint32_t BEAlign = 0;
3392 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
3393 !Ins[i].Flags.isInConsecutiveRegs())
3394 BEAlign = 8 - ArgSize;
3395
3396 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
3397
3398 // Create load nodes to retrieve arguments from the stack.
3399 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3400
3401 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
3402 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
3403 MVT MemVT = VA.getValVT();
3404
3405 switch (VA.getLocInfo()) {
3406 default:
3407 break;
3408 case CCValAssign::Trunc:
3409 case CCValAssign::BCvt:
3410 MemVT = VA.getLocVT();
3411 break;
3412 case CCValAssign::Indirect:
3413 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3414, __PRETTY_FUNCTION__))
3414 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3414, __PRETTY_FUNCTION__))
;
3415 llvm_unreachable("Spilling of SVE vectors not yet implemented")::llvm::llvm_unreachable_internal("Spilling of SVE vectors not yet implemented"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3415)
;
3416 case CCValAssign::SExt:
3417 ExtType = ISD::SEXTLOAD;
3418 break;
3419 case CCValAssign::ZExt:
3420 ExtType = ISD::ZEXTLOAD;
3421 break;
3422 case CCValAssign::AExt:
3423 ExtType = ISD::EXTLOAD;
3424 break;
3425 }
3426
3427 ArgValue = DAG.getExtLoad(
3428 ExtType, DL, VA.getLocVT(), Chain, FIN,
3429 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3430 MemVT);
3431
3432 }
3433 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
3434 ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
3435 ArgValue, DAG.getValueType(MVT::i32));
3436 InVals.push_back(ArgValue);
3437 }
3438
3439 // varargs
3440 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3441 if (isVarArg) {
3442 if (!Subtarget->isTargetDarwin() || IsWin64) {
3443 // The AAPCS variadic function ABI is identical to the non-variadic
3444 // one. As a result there may be more arguments in registers and we should
3445 // save them for future reference.
3446 // Win64 variadic functions also pass arguments in registers, but all float
3447 // arguments are passed in integer registers.
3448 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
3449 }
3450
3451 // This will point to the next argument passed via stack.
3452 unsigned StackOffset = CCInfo.getNextStackOffset();
3453 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
3454 StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
3455 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
3456
3457 if (MFI.hasMustTailInVarArgFunc()) {
3458 SmallVector<MVT, 2> RegParmTypes;
3459 RegParmTypes.push_back(MVT::i64);
3460 RegParmTypes.push_back(MVT::f128);
3461 // Compute the set of forwarded registers. The rest are scratch.
3462 SmallVectorImpl<ForwardedRegister> &Forwards =
3463 FuncInfo->getForwardedMustTailRegParms();
3464 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
3465 CC_AArch64_AAPCS);
3466
3467 // Conservatively forward X8, since it might be used for aggregate return.
3468 if (!CCInfo.isAllocated(AArch64::X8)) {
3469 unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
3470 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
3471 }
3472 }
3473 }
3474
3475 // On Windows, InReg pointers must be returned, so record the pointer in a
3476 // virtual register at the start of the function so it can be returned in the
3477 // epilogue.
3478 if (IsWin64) {
3479 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3480 if (Ins[I].Flags.isInReg()) {
3481 assert(!FuncInfo->getSRetReturnReg())((!FuncInfo->getSRetReturnReg()) ? static_cast<void>
(0) : __assert_fail ("!FuncInfo->getSRetReturnReg()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3481, __PRETTY_FUNCTION__))
;
3482
3483 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3484 Register Reg =
3485 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3486 FuncInfo->setSRetReturnReg(Reg);
3487
3488 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
3489 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
3490 break;
3491 }
3492 }
3493 }
3494
3495 unsigned StackArgSize = CCInfo.getNextStackOffset();
3496 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3497 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
3498 // This is a non-standard ABI so by fiat I say we're allowed to make full
3499 // use of the stack area to be popped, which must be aligned to 16 bytes in
3500 // any case:
3501 StackArgSize = alignTo(StackArgSize, 16);
3502
3503 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
3504 // a multiple of 16.
3505 FuncInfo->setArgumentStackToRestore(StackArgSize);
3506
3507 // This realignment carries over to the available bytes below. Our own
3508 // callers will guarantee the space is free by giving an aligned value to
3509 // CALLSEQ_START.
3510 }
3511 // Even if we're not expected to free up the space, it's useful to know how
3512 // much is there while considering tail calls (because we can reuse it).
3513 FuncInfo->setBytesInStackArgArea(StackArgSize);
3514
3515 if (Subtarget->hasCustomCallingConv())
3516 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
3517
3518 return Chain;
3519}
3520
3521void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
3522 SelectionDAG &DAG,
3523 const SDLoc &DL,
3524 SDValue &Chain) const {
3525 MachineFunction &MF = DAG.getMachineFunction();
3526 MachineFrameInfo &MFI = MF.getFrameInfo();
3527 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3528 auto PtrVT = getPointerTy(DAG.getDataLayout());
3529 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3530
3531 SmallVector<SDValue, 8> MemOps;
3532
3533 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
3534 AArch64::X3, AArch64::X4, AArch64::X5,
3535 AArch64::X6, AArch64::X7 };
3536 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
3537 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
3538
3539 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
3540 int GPRIdx = 0;
3541 if (GPRSaveSize != 0) {
3542 if (IsWin64) {
3543 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
3544 if (GPRSaveSize & 15)
3545 // The extra size here, if triggered, will always be 8.
3546 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
3547 } else
3548 GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
3549
3550 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
3551
3552 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
3553 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
3554 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
3555 SDValue Store = DAG.getStore(
3556 Val.getValue(1), DL, Val, FIN,
3557 IsWin64
3558 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
3559 GPRIdx,
3560 (i - FirstVariadicGPR) * 8)
3561 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
3562 MemOps.push_back(Store);
3563 FIN =
3564 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
3565 }
3566 }
3567 FuncInfo->setVarArgsGPRIndex(GPRIdx);
3568 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
3569
3570 if (Subtarget->hasFPARMv8() && !IsWin64) {
3571 static const MCPhysReg FPRArgRegs[] = {
3572 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
3573 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
3574 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
3575 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
3576
3577 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
3578 int FPRIdx = 0;
3579 if (FPRSaveSize != 0) {
3580 FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
3581
3582 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
3583
3584 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
3585 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
3586 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
3587
3588 SDValue Store = DAG.getStore(
3589 Val.getValue(1), DL, Val, FIN,
3590 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
3591 MemOps.push_back(Store);
3592 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3593 DAG.getConstant(16, DL, PtrVT));
3594 }
3595 }
3596 FuncInfo->setVarArgsFPRIndex(FPRIdx);
3597 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3598 }
3599
3600 if (!MemOps.empty()) {
3601 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3602 }
3603}
3604
3605/// LowerCallResult - Lower the result values of a call into the
3606/// appropriate copies out of appropriate physical registers.
3607SDValue AArch64TargetLowering::LowerCallResult(
3608 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3609 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3610 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3611 SDValue ThisVal) const {
3612 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3613 ? RetCC_AArch64_WebKit_JS
3614 : RetCC_AArch64_AAPCS;
3615 // Assign locations to each value returned by this call.
3616 SmallVector<CCValAssign, 16> RVLocs;
3617 DenseMap<unsigned, SDValue> CopiedRegs;
3618 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3619 *DAG.getContext());
3620 CCInfo.AnalyzeCallResult(Ins, RetCC);
3621
3622 // Copy all of the result registers out of their specified physreg.
3623 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3624 CCValAssign VA = RVLocs[i];
3625
3626 // Pass 'this' value directly from the argument to return value, to avoid
3627 // reg unit interference
3628 if (i == 0 && isThisReturn) {
3629 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3630, __PRETTY_FUNCTION__))
3630 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3630, __PRETTY_FUNCTION__))
;
3631 InVals.push_back(ThisVal);
3632 continue;
3633 }
3634
3635 // Avoid copying a physreg twice since RegAllocFast is incompetent and only
3636 // allows one use of a physreg per block.
3637 SDValue Val = CopiedRegs.lookup(VA.getLocReg());
3638 if (!Val) {
3639 Val =
3640 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3641 Chain = Val.getValue(1);
3642 InFlag = Val.getValue(2);
3643 CopiedRegs[VA.getLocReg()] = Val;
3644 }
3645
3646 switch (VA.getLocInfo()) {
3647 default:
3648 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3648)
;
3649 case CCValAssign::Full:
3650 break;
3651 case CCValAssign::BCvt:
3652 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3653 break;
3654 case CCValAssign::AExtUpper:
3655 Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
3656 DAG.getConstant(32, DL, VA.getLocVT()));
3657 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3658 case CCValAssign::AExt:
3659 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3660 case CCValAssign::ZExt:
3661 Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
3662 break;
3663 }
3664
3665 InVals.push_back(Val);
3666 }
3667
3668 return Chain;
3669}
3670
3671/// Return true if the calling convention is one that we can guarantee TCO for.
3672static bool canGuaranteeTCO(CallingConv::ID CC) {
3673 return CC == CallingConv::Fast;
3674}
3675
3676/// Return true if we might ever do TCO for calls with this calling convention.
3677static bool mayTailCallThisCC(CallingConv::ID CC) {
3678 switch (CC) {
3679 case CallingConv::C:
3680 case CallingConv::PreserveMost:
3681 case CallingConv::Swift:
3682 return true;
3683 default:
3684 return canGuaranteeTCO(CC);
3685 }
3686}
3687
3688bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3689 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3690 const SmallVectorImpl<ISD::OutputArg> &Outs,
3691 const SmallVectorImpl<SDValue> &OutVals,
3692 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3693 if (!mayTailCallThisCC(CalleeCC))
3694 return false;
3695
3696 MachineFunction &MF = DAG.getMachineFunction();
3697 const Function &CallerF = MF.getFunction();
3698 CallingConv::ID CallerCC = CallerF.getCallingConv();
3699 bool CCMatch = CallerCC == CalleeCC;
3700
3701 // Byval parameters hand the function a pointer directly into the stack area
3702 // we want to reuse during a tail call. Working around this *is* possible (see
3703 // X86) but less efficient and uglier in LowerCall.
3704 for (Function::const_arg_iterator i = CallerF.arg_begin(),
3705 e = CallerF.arg_end();
3706 i != e; ++i) {
3707 if (i->hasByValAttr())
3708 return false;
3709
3710 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
3711 // In this case, it is necessary to save/restore X0 in the callee. Tail
3712 // call opt interferes with this. So we disable tail call opt when the
3713 // caller has an argument with "inreg" attribute.
3714
3715 // FIXME: Check whether the callee also has an "inreg" argument.
3716 if (i->hasInRegAttr())
3717 return false;
3718 }
3719
3720 if (getTargetMachine().Options.GuaranteedTailCallOpt)
3721 return canGuaranteeTCO(CalleeCC) && CCMatch;
3722
3723 // Externally-defined functions with weak linkage should not be
3724 // tail-called on AArch64 when the OS does not support dynamic
3725 // pre-emption of symbols, as the AAELF spec requires normal calls
3726 // to undefined weak functions to be replaced with a NOP or jump to the
3727 // next instruction. The behaviour of branch instructions in this
3728 // situation (as used for tail calls) is implementation-defined, so we
3729 // cannot rely on the linker replacing the tail call with a return.
3730 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3731 const GlobalValue *GV = G->getGlobal();
3732 const Triple &TT = getTargetMachine().getTargetTriple();
3733 if (GV->hasExternalWeakLinkage() &&
3734 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3735 return false;
3736 }
3737
3738 // Now we search for cases where we can use a tail call without changing the
3739 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3740 // concept.
3741
3742 // I want anyone implementing a new calling convention to think long and hard
3743 // about this assert.
3744 assert((!isVarArg || CalleeCC == CallingConv::C) &&(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3745, __PRETTY_FUNCTION__))
3745 "Unexpected variadic calling convention")(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3745, __PRETTY_FUNCTION__))
;
3746
3747 LLVMContext &C = *DAG.getContext();
3748 if (isVarArg && !Outs.empty()) {
3749 // At least two cases here: if caller is fastcc then we can't have any
3750 // memory arguments (we'd be expected to clean up the stack afterwards). If
3751 // caller is C then we could potentially use its argument area.
3752
3753 // FIXME: for now we take the most conservative of these in both cases:
3754 // disallow all variadic memory operands.
3755 SmallVector<CCValAssign, 16> ArgLocs;
3756 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3757
3758 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3759 for (const CCValAssign &ArgLoc : ArgLocs)
3760 if (!ArgLoc.isRegLoc())
3761 return false;
3762 }
3763
3764 // Check that the call results are passed in the same way.
3765 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3766 CCAssignFnForCall(CalleeCC, isVarArg),
3767 CCAssignFnForCall(CallerCC, isVarArg)))
3768 return false;
3769 // The callee has to preserve all registers the caller needs to preserve.
3770 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3771 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3772 if (!CCMatch) {
3773 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3774 if (Subtarget->hasCustomCallingConv()) {
3775 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
3776 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
3777 }
3778 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3779 return false;
3780 }
3781
3782 // Nothing more to check if the callee is taking no arguments
3783 if (Outs.empty())
3784 return true;
3785
3786 SmallVector<CCValAssign, 16> ArgLocs;
3787 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3788
3789 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3790
3791 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3792
3793 // If the stack arguments for this call do not fit into our own save area then
3794 // the call cannot be made tail.
3795 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3796 return false;
3797
3798 const MachineRegisterInfo &MRI = MF.getRegInfo();
3799 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3800 return false;
3801
3802 return true;
3803}
3804
3805SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3806 SelectionDAG &DAG,
3807 MachineFrameInfo &MFI,
3808 int ClobberedFI) const {
3809 SmallVector<SDValue, 8> ArgChains;
3810 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3811 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3812
3813 // Include the original chain at the beginning of the list. When this is
3814 // used by target LowerCall hooks, this helps legalize find the
3815 // CALLSEQ_BEGIN node.
3816 ArgChains.push_back(Chain);
3817
3818 // Add a chain value for each stack argument corresponding
3819 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
3820 UE = DAG.getEntryNode().getNode()->use_end();
3821 U != UE; ++U)
3822 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
3823 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
3824 if (FI->getIndex() < 0) {
3825 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
3826 int64_t InLastByte = InFirstByte;
3827 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
3828
3829 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
3830 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
3831 ArgChains.push_back(SDValue(L, 1));
3832 }
3833
3834 // Build a tokenfactor for all the chains.
3835 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
3836}
3837
3838bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
3839 bool TailCallOpt) const {
3840 return CallCC == CallingConv::Fast && TailCallOpt;
3841}
3842
3843/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
3844/// and add input and output parameter nodes.
3845SDValue
3846AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
3847 SmallVectorImpl<SDValue> &InVals) const {
3848 SelectionDAG &DAG = CLI.DAG;
3849 SDLoc &DL = CLI.DL;
3850 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
3851 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
3852 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
3853 SDValue Chain = CLI.Chain;
3854 SDValue Callee = CLI.Callee;
3855 bool &IsTailCall = CLI.IsTailCall;
3856 CallingConv::ID CallConv = CLI.CallConv;
3857 bool IsVarArg = CLI.IsVarArg;
3858
3859 MachineFunction &MF = DAG.getMachineFunction();
3860 MachineFunction::CallSiteInfo CSInfo;
3861 bool IsThisReturn = false;
3862
3863 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3864 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3865 bool IsSibCall = false;
3866
3867 if (IsTailCall) {
3868 // Check if it's really possible to do a tail call.
3869 IsTailCall = isEligibleForTailCallOptimization(
3870 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3871 if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
3872 report_fatal_error("failed to perform tail call elimination on a call "
3873 "site marked musttail");
3874
3875 // A sibling call is one where we're under the usual C ABI and not planning
3876 // to change that but can still do a tail call:
3877 if (!TailCallOpt && IsTailCall)
3878 IsSibCall = true;
3879
3880 if (IsTailCall)
3881 ++NumTailCalls;
3882 }
3883
3884 // Analyze operands of the call, assigning locations to each operand.
3885 SmallVector<CCValAssign, 16> ArgLocs;
3886 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
3887 *DAG.getContext());
3888
3889 if (IsVarArg) {
3890 // Handle fixed and variable vector arguments differently.
3891 // Variable vector arguments always go into memory.
3892 unsigned NumArgs = Outs.size();
3893
3894 for (unsigned i = 0; i != NumArgs; ++i) {
3895 MVT ArgVT = Outs[i].VT;
3896 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3897 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
3898 /*IsVarArg=*/ !Outs[i].IsFixed);
3899 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
3900 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3900, __PRETTY_FUNCTION__))
;
3901 (void)Res;
3902 }
3903 } else {
3904 // At this point, Outs[].VT may already be promoted to i32. To correctly
3905 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3906 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3907 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
3908 // we use a special version of AnalyzeCallOperands to pass in ValVT and
3909 // LocVT.
3910 unsigned NumArgs = Outs.size();
3911 for (unsigned i = 0; i != NumArgs; ++i) {
3912 MVT ValVT = Outs[i].VT;
3913 // Get type of the original argument.
3914 EVT ActualVT = getValueType(DAG.getDataLayout(),
3915 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
3916 /*AllowUnknown*/ true);
3917 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
3918 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3919 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3920 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3921 ValVT = MVT::i8;
3922 else if (ActualMVT == MVT::i16)
3923 ValVT = MVT::i16;
3924
3925 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3926 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
3927 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3927, __PRETTY_FUNCTION__))
;
3928 (void)Res;
3929 }
3930 }
3931
3932 // Get a count of how many bytes are to be pushed on the stack.
3933 unsigned NumBytes = CCInfo.getNextStackOffset();
3934
3935 if (IsSibCall) {
3936 // Since we're not changing the ABI to make this a tail call, the memory
3937 // operands are already available in the caller's incoming argument space.
3938 NumBytes = 0;
3939 }
3940
3941 // FPDiff is the byte offset of the call's argument area from the callee's.
3942 // Stores to callee stack arguments will be placed in FixedStackSlots offset
3943 // by this amount for a tail call. In a sibling call it must be 0 because the
3944 // caller will deallocate the entire stack and the callee still expects its
3945 // arguments to begin at SP+0. Completely unused for non-tail calls.
3946 int FPDiff = 0;
3947
3948 if (IsTailCall && !IsSibCall) {
3949 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
3950
3951 // Since callee will pop argument stack as a tail call, we must keep the
3952 // popped size 16-byte aligned.
3953 NumBytes = alignTo(NumBytes, 16);
3954
3955 // FPDiff will be negative if this tail call requires more space than we
3956 // would automatically have in our incoming argument space. Positive if we
3957 // can actually shrink the stack.
3958 FPDiff = NumReusableBytes - NumBytes;
3959
3960 // The stack pointer must be 16-byte aligned at all times it's used for a
3961 // memory operation, which in practice means at *all* times and in
3962 // particular across call boundaries. Therefore our own arguments started at
3963 // a 16-byte aligned SP and the delta applied for the tail call should
3964 // satisfy the same constraint.
3965 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")((FPDiff % 16 == 0 && "unaligned stack on tail call")
? static_cast<void> (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3965, __PRETTY_FUNCTION__))
;
3966 }
3967
3968 // Adjust the stack pointer for the new arguments...
3969 // These operations are automatically eliminated by the prolog/epilog pass
3970 if (!IsSibCall)
3971 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
3972
3973 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
3974 getPointerTy(DAG.getDataLayout()));
3975
3976 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3977 SmallSet<unsigned, 8> RegsUsed;
3978 SmallVector<SDValue, 8> MemOpChains;
3979 auto PtrVT = getPointerTy(DAG.getDataLayout());
3980
3981 if (IsVarArg && CLI.CS && CLI.CS.isMustTailCall()) {
3982 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
3983 for (const auto &F : Forwards) {
3984 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
3985 RegsToPass.emplace_back(F.PReg, Val);
3986 }
3987 }
3988
3989 // Walk the register/memloc assignments, inserting copies/loads.
3990 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3991 CCValAssign &VA = ArgLocs[i];
3992 SDValue Arg = OutVals[i];
3993 ISD::ArgFlagsTy Flags = Outs[i].Flags;
3994
3995 // Promote the value if needed.
3996 switch (VA.getLocInfo()) {
3997 default:
3998 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3998)
;
3999 case CCValAssign::Full:
4000 break;
4001 case CCValAssign::SExt:
4002 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
4003 break;
4004 case CCValAssign::ZExt:
4005 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
4006 break;
4007 case CCValAssign::AExt:
4008 if (Outs[i].ArgVT == MVT::i1) {
4009 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
4010 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
4011 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
4012 }
4013 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
4014 break;
4015 case CCValAssign::AExtUpper:
4016 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4016, __PRETTY_FUNCTION__))
;
4017 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
4018 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
4019 DAG.getConstant(32, DL, VA.getLocVT()));
4020 break;
4021 case CCValAssign::BCvt:
4022 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
4023 break;
4024 case CCValAssign::Trunc:
4025 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
4026 break;
4027 case CCValAssign::FPExt:
4028 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
4029 break;
4030 case CCValAssign::Indirect:
4031 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4032, __PRETTY_FUNCTION__))
4032 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4032, __PRETTY_FUNCTION__))
;
4033 llvm_unreachable("Spilling of SVE vectors not yet implemented")::llvm::llvm_unreachable_internal("Spilling of SVE vectors not yet implemented"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4033)
;
4034 }
4035
4036 if (VA.isRegLoc()) {
4037 if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
4038 Outs[0].VT == MVT::i64) {
4039 assert(VA.getLocVT() == MVT::i64 &&((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4040, __PRETTY_FUNCTION__))
4040 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4040, __PRETTY_FUNCTION__))
;
4041 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4042, __PRETTY_FUNCTION__))
4042 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4042, __PRETTY_FUNCTION__))
;
4043 IsThisReturn = true;
4044 }
4045 if (RegsUsed.count(VA.getLocReg())) {
4046 // If this register has already been used then we're trying to pack
4047 // parts of an [N x i32] into an X-register. The extension type will
4048 // take care of putting the two halves in the right place but we have to
4049 // combine them.
4050 SDValue &Bits =
4051 std::find_if(RegsToPass.begin(), RegsToPass.end(),
4052 [=](const std::pair<unsigned, SDValue> &Elt) {
4053 return Elt.first == VA.getLocReg();
4054 })
4055 ->second;
4056 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
4057 // Call site info is used for function's parameter entry value
4058 // tracking. For now we track only simple cases when parameter
4059 // is transferred through whole register.
4060 CSInfo.erase(std::remove_if(CSInfo.begin(), CSInfo.end(),
4061 [&VA](MachineFunction::ArgRegPair ArgReg) {
4062 return ArgReg.Reg == VA.getLocReg();
4063 }),
4064 CSInfo.end());
4065 } else {
4066 RegsToPass.emplace_back(VA.getLocReg(), Arg);
4067 RegsUsed.insert(VA.getLocReg());
4068 const TargetOptions &Options = DAG.getTarget().Options;
4069 if (Options.EnableDebugEntryValues)
4070 CSInfo.emplace_back(VA.getLocReg(), i);
4071 }
4072 } else {
4073 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4073, __PRETTY_FUNCTION__))
;
4074
4075 SDValue DstAddr;
4076 MachinePointerInfo DstInfo;
4077
4078 // FIXME: This works on big-endian for composite byvals, which are the
4079 // common case. It should also work for fundamental types too.
4080 uint32_t BEAlign = 0;
4081 unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
4082 : VA.getValVT().getSizeInBits();
4083 OpSize = (OpSize + 7) / 8;
4084 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
4085 !Flags.isInConsecutiveRegs()) {
4086 if (OpSize < 8)
4087 BEAlign = 8 - OpSize;
4088 }
4089 unsigned LocMemOffset = VA.getLocMemOffset();
4090 int32_t Offset = LocMemOffset + BEAlign;
4091 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
4092 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
4093
4094 if (IsTailCall) {
4095 Offset = Offset + FPDiff;
4096 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4097
4098 DstAddr = DAG.getFrameIndex(FI, PtrVT);
4099 DstInfo =
4100 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
4101
4102 // Make sure any stack arguments overlapping with where we're storing
4103 // are loaded before this eventual operation. Otherwise they'll be
4104 // clobbered.
4105 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
4106 } else {
4107 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
4108
4109 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
4110 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
4111 LocMemOffset);
4112 }
4113
4114 if (Outs[i].Flags.isByVal()) {
4115 SDValue SizeNode =
4116 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
4117 SDValue Cpy = DAG.getMemcpy(
4118 Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
4119 /*isVol = */ false, /*AlwaysInline = */ false,
4120 /*isTailCall = */ false,
4121 DstInfo, MachinePointerInfo());
4122
4123 MemOpChains.push_back(Cpy);
4124 } else {
4125 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
4126 // promoted to a legal register type i32, we should truncate Arg back to
4127 // i1/i8/i16.
4128 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
4129 VA.getValVT() == MVT::i16)
4130 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
4131
4132 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
4133 MemOpChains.push_back(Store);
4134 }
4135 }
4136 }
4137
4138 if (!MemOpChains.empty())
4139 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4140
4141 // Build a sequence of copy-to-reg nodes chained together with token chain
4142 // and flag operands which copy the outgoing args into the appropriate regs.
4143 SDValue InFlag;
4144 for (auto &RegToPass : RegsToPass) {
4145 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
4146 RegToPass.second, InFlag);
4147 InFlag = Chain.getValue(1);
4148 }
4149
4150 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
4151 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
4152 // node so that legalize doesn't hack it.
4153 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4154 auto GV = G->getGlobal();
4155 unsigned OpFlags =
4156 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
4157 if (OpFlags & AArch64II::MO_GOT) {
4158 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
4159 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
4160 } else {
4161 const GlobalValue *GV = G->getGlobal();
4162 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
4163 }
4164 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4165 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4166 Subtarget->isTargetMachO()) {
4167 const char *Sym = S->getSymbol();
4168 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
4169 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
4170 } else {
4171 const char *Sym = S->getSymbol();
4172 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
4173 }
4174 }
4175
4176 // We don't usually want to end the call-sequence here because we would tidy
4177 // the frame up *after* the call, however in the ABI-changing tail-call case
4178 // we've carefully laid out the parameters so that when sp is reset they'll be
4179 // in the correct location.
4180 if (IsTailCall && !IsSibCall) {
4181 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
4182 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
4183 InFlag = Chain.getValue(1);
4184 }
4185
4186 std::vector<SDValue> Ops;
4187 Ops.push_back(Chain);
4188 Ops.push_back(Callee);
4189
4190 if (IsTailCall) {
4191 // Each tail call may have to adjust the stack by a different amount, so
4192 // this information must travel along with the operation for eventual
4193 // consumption by emitEpilogue.
4194 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
4195 }
4196
4197 // Add argument registers to the end of the list so that they are known live
4198 // into the call.
4199 for (auto &RegToPass : RegsToPass)
4200 Ops.push_back(DAG.getRegister(RegToPass.first,
4201 RegToPass.second.getValueType()));
4202
4203 // Check callee args/returns for SVE registers and set calling convention
4204 // accordingly.
4205 if (CallConv == CallingConv::C) {
4206 bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
4207 return Out.VT.isScalableVector();
4208 });
4209 bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
4210 return In.VT.isScalableVector();
4211 });
4212
4213 if (CalleeInSVE || CalleeOutSVE)
4214 CallConv = CallingConv::AArch64_SVE_VectorCall;
4215 }
4216
4217 // Add a register mask operand representing the call-preserved registers.
4218 const uint32_t *Mask;
4219 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4220 if (IsThisReturn) {
4221 // For 'this' returns, use the X0-preserving mask if applicable
4222 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
4223 if (!Mask) {
4224 IsThisReturn = false;
4225 Mask = TRI->getCallPreservedMask(MF, CallConv);
4226 }
4227 } else
4228 Mask = TRI->getCallPreservedMask(MF, CallConv);
4229
4230 if (Subtarget->hasCustomCallingConv())
4231 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
4232
4233 if (TRI->isAnyArgRegReserved(MF))
4234 TRI->emitReservedArgRegCallError(MF);
4235
4236 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4236, __PRETTY_FUNCTION__))
;
4237 Ops.push_back(DAG.getRegisterMask(Mask));
4238
4239 if (InFlag.getNode())
4240 Ops.push_back(InFlag);
4241
4242 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4243
4244 // If we're doing a tall call, use a TC_RETURN here rather than an
4245 // actual call instruction.
4246 if (IsTailCall) {
4247 MF.getFrameInfo().setHasTailCall();
4248 SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
4249 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4250 return Ret;
4251 }
4252
4253 // Returns a chain and a flag for retval copy to use.
4254 Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
4255 InFlag = Chain.getValue(1);
4256 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4257
4258 uint64_t CalleePopBytes =
4259 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
4260
4261 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
4262 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
4263 InFlag, DL);
4264 if (!Ins.empty())
4265 InFlag = Chain.getValue(1);
4266
4267 // Handle result values, copying them out of physregs into vregs that we
4268 // return.
4269 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
4270 InVals, IsThisReturn,
4271 IsThisReturn ? OutVals[0] : SDValue());
4272}
4273
4274bool AArch64TargetLowering::CanLowerReturn(
4275 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
4276 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4277 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
4278 ? RetCC_AArch64_WebKit_JS
4279 : RetCC_AArch64_AAPCS;
4280 SmallVector<CCValAssign, 16> RVLocs;
4281 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
4282 return CCInfo.CheckReturn(Outs, RetCC);
4283}
4284
4285SDValue
4286AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
4287 bool isVarArg,
4288 const SmallVectorImpl<ISD::OutputArg> &Outs,
4289 const SmallVectorImpl<SDValue> &OutVals,
4290 const SDLoc &DL, SelectionDAG &DAG) const {
4291 auto &MF = DAG.getMachineFunction();
4292 auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4293
4294 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
4295 ? RetCC_AArch64_WebKit_JS
4296 : RetCC_AArch64_AAPCS;
4297 SmallVector<CCValAssign, 16> RVLocs;
4298 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4299 *DAG.getContext());
4300 CCInfo.AnalyzeReturn(Outs, RetCC);
4301
4302 // Copy the result values into the output registers.
4303 SDValue Flag;
4304 SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
4305 SmallSet<unsigned, 4> RegsUsed;
4306 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
4307 ++i, ++realRVLocIdx) {
4308 CCValAssign &VA = RVLocs[i];
4309 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4309, __PRETTY_FUNCTION__))
;
4310 SDValue Arg = OutVals[realRVLocIdx];
4311
4312 switch (VA.getLocInfo()) {
4313 default:
4314 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4314)
;
4315 case CCValAssign::Full:
4316 if (Outs[i].ArgVT == MVT::i1) {
4317 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
4318 // value. This is strictly redundant on Darwin (which uses "zeroext
4319 // i1"), but will be optimised out before ISel.
4320 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
4321 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
4322 }
4323 break;
4324 case CCValAssign::BCvt:
4325 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
4326 break;
4327 case CCValAssign::AExt:
4328 case CCValAssign::ZExt:
4329 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
4330 break;
4331 case CCValAssign::AExtUpper:
4332 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4332, __PRETTY_FUNCTION__))
;
4333 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
4334 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
4335 DAG.getConstant(32, DL, VA.getLocVT()));
4336 break;
4337 }
4338
4339 if (RegsUsed.count(VA.getLocReg())) {
4340 SDValue &Bits =
4341 std::find_if(RetVals.begin(), RetVals.end(),
4342 [=](const std::pair<unsigned, SDValue> &Elt) {
4343 return Elt.first == VA.getLocReg();
4344 })
4345 ->second;
4346 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
4347 } else {
4348 RetVals.emplace_back(VA.getLocReg(), Arg);
4349 RegsUsed.insert(VA.getLocReg());
4350 }
4351 }
4352
4353 SmallVector<SDValue, 4> RetOps(1, Chain);
4354 for (auto &RetVal : RetVals) {
4355 Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
4356 Flag = Chain.getValue(1);
4357 RetOps.push_back(
4358 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
4359 }
4360
4361 // Windows AArch64 ABIs require that for returning structs by value we copy
4362 // the sret argument into X0 for the return.
4363 // We saved the argument into a virtual register in the entry block,
4364 // so now we copy the value out and into X0.
4365 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
4366 SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
4367 getPointerTy(MF.getDataLayout()));
4368
4369 unsigned RetValReg = AArch64::X0;
4370 Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
4371 Flag = Chain.getValue(1);
4372
4373 RetOps.push_back(
4374 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
4375 }
4376
4377 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4378 const MCPhysReg *I =
4379 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
4380 if (I) {
4381 for (; *I; ++I) {
4382 if (AArch64::GPR64RegClass.contains(*I))
4383 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
4384 else if (AArch64::FPR64RegClass.contains(*I))
4385 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
4386 else
4387 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4387)
;
4388 }
4389 }
4390
4391 RetOps[0] = Chain; // Update chain.
4392
4393 // Add the flag if we have it.
4394 if (Flag.getNode())
4395 RetOps.push_back(Flag);
4396
4397 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
4398}
4399
4400//===----------------------------------------------------------------------===//
4401// Other Lowering Code
4402//===----------------------------------------------------------------------===//
4403
4404SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
4405 SelectionDAG &DAG,
4406 unsigned Flag) const {
4407 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
4408 N->getOffset(), Flag);
4409}
4410
4411SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
4412 SelectionDAG &DAG,
4413 unsigned Flag) const {
4414 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
4415}
4416
4417SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
4418 SelectionDAG &DAG,
4419 unsigned Flag) const {
4420 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
4421 N->getOffset(), Flag);
4422}
4423
4424SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
4425 SelectionDAG &DAG,
4426 unsigned Flag) const {
4427 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
4428}
4429
4430// (loadGOT sym)
4431template <class NodeTy>
4432SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
4433 unsigned Flags) const {
4434 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
4435 SDLoc DL(N);
4436 EVT Ty = getPointerTy(DAG.getDataLayout());
4437 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
4438 // FIXME: Once remat is capable of dealing with instructions with register
4439 // operands, expand this into two nodes instead of using a wrapper node.
4440 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
4441}
4442
4443// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
4444template <class NodeTy>
4445SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
4446 unsigned Flags) const {
4447 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
4448 SDLoc DL(N);
4449 EVT Ty = getPointerTy(DAG.getDataLayout());
4450 const unsigned char MO_NC = AArch64II::MO_NC;
4451 return DAG.getNode(
4452 AArch64ISD::WrapperLarge, DL, Ty,
4453 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
4454 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
4455 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
4456 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
4457}
4458
4459// (addlow (adrp %hi(sym)) %lo(sym))
4460template <class NodeTy>
4461SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4462 unsigned Flags) const {
4463 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
4464 SDLoc DL(N);
4465 EVT Ty = getPointerTy(DAG.getDataLayout());
4466 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
4467 SDValue Lo = getTargetNode(N, Ty, DAG,
4468 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
4469 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
4470 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
4471}
4472
4473// (adr sym)
4474template <class NodeTy>
4475SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
4476 unsigned Flags) const {
4477 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrTiny\n"
; } } while (false)
;
4478 SDLoc DL(N);
4479 EVT Ty = getPointerTy(DAG.getDataLayout());
4480 SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
4481 return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
4482}
4483
4484SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
4485 SelectionDAG &DAG) const {
4486 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
4487 const GlobalValue *GV = GN->getGlobal();
4488 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
4489
4490 if (OpFlags != AArch64II::MO_NO_FLAG)
4491 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4492, __PRETTY_FUNCTION__))
4492 "unexpected offset in global node")((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4492, __PRETTY_FUNCTION__))
;
4493
4494 // This also catches the large code model case for Darwin, and tiny code
4495 // model with got relocations.
4496 if ((OpFlags & AArch64II::MO_GOT) != 0) {
4497 return getGOT(GN, DAG, OpFlags);
4498 }
4499
4500 SDValue Result;
4501 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4502 Result = getAddrLarge(GN, DAG, OpFlags);
4503 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4504 Result = getAddrTiny(GN, DAG, OpFlags);
4505 } else {
4506 Result = getAddr(GN, DAG, OpFlags);
4507 }
4508 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4509 SDLoc DL(GN);
4510 if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
4511 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4512 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
4513 return Result;
4514}
4515
4516/// Convert a TLS address reference into the correct sequence of loads
4517/// and calls to compute the variable's address (for Darwin, currently) and
4518/// return an SDValue containing the final node.
4519
4520/// Darwin only has one TLS scheme which must be capable of dealing with the
4521/// fully general situation, in the worst case. This means:
4522/// + "extern __thread" declaration.
4523/// + Defined in a possibly unknown dynamic library.
4524///
4525/// The general system is that each __thread variable has a [3 x i64] descriptor
4526/// which contains information used by the runtime to calculate the address. The
4527/// only part of this the compiler needs to know about is the first xword, which
4528/// contains a function pointer that must be called with the address of the
4529/// entire descriptor in "x0".
4530///
4531/// Since this descriptor may be in a different unit, in general even the
4532/// descriptor must be accessed via an indirect load. The "ideal" code sequence
4533/// is:
4534/// adrp x0, _var@TLVPPAGE
4535/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
4536/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
4537/// ; the function pointer
4538/// blr x1 ; Uses descriptor address in x0
4539/// ; Address of _var is now in x0.
4540///
4541/// If the address of _var's descriptor *is* known to the linker, then it can
4542/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
4543/// a slight efficiency gain.
4544SDValue
4545AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
4546 SelectionDAG &DAG) const {
4547 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4548, __PRETTY_FUNCTION__))
4548 "This function expects a Darwin target")((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4548, __PRETTY_FUNCTION__))
;
4549
4550 SDLoc DL(Op);
4551 MVT PtrVT = getPointerTy(DAG.getDataLayout());
4552 MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
4553 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4554
4555 SDValue TLVPAddr =
4556 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4557 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
4558
4559 // The first entry in the descriptor is a function pointer that we must call
4560 // to obtain the address of the variable.
4561 SDValue Chain = DAG.getEntryNode();
4562 SDValue FuncTLVGet = DAG.getLoad(
4563 PtrMemVT, DL, Chain, DescAddr,
4564 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
4565 /* Alignment = */ PtrMemVT.getSizeInBits() / 8,
4566 MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
4567 Chain = FuncTLVGet.getValue(1);
4568
4569 // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
4570 FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
4571
4572 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4573 MFI.setAdjustsStack(true);
4574
4575 // TLS calls preserve all registers except those that absolutely must be
4576 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
4577 // silly).
4578 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4579 const uint32_t *Mask = TRI->getTLSCallPreservedMask();
4580 if (Subtarget->hasCustomCallingConv())
4581 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
4582
4583 // Finally, we can make the call. This is just a degenerate version of a
4584 // normal AArch64 call node: x0 takes the address of the descriptor, and
4585 // returns the address of the variable in this thread.
4586 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
4587 Chain =
4588 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
4589 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
4590 DAG.getRegisterMask(Mask), Chain.getValue(1));
4591 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
4592}
4593
4594/// When accessing thread-local variables under either the general-dynamic or
4595/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
4596/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
4597/// is a function pointer to carry out the resolution.
4598///
4599/// The sequence is:
4600/// adrp x0, :tlsdesc:var
4601/// ldr x1, [x0, #:tlsdesc_lo12:var]
4602/// add x0, x0, #:tlsdesc_lo12:var
4603/// .tlsdesccall var
4604/// blr x1
4605/// (TPIDR_EL0 offset now in x0)
4606///
4607/// The above sequence must be produced unscheduled, to enable the linker to
4608/// optimize/relax this sequence.
4609/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
4610/// above sequence, and expanded really late in the compilation flow, to ensure
4611/// the sequence is produced as per above.
4612SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
4613 const SDLoc &DL,
4614 SelectionDAG &DAG) const {
4615 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4616
4617 SDValue Chain = DAG.getEntryNode();
4618 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4619
4620 Chain =
4621 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
4622 SDValue Glue = Chain.getValue(1);
4623
4624 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
4625}
4626
4627SDValue
4628AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
4629 SelectionDAG &DAG) const {
4630 assert(Subtarget->isTargetELF() && "This function expects an ELF target")((Subtarget->isTargetELF() && "This function expects an ELF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4630, __PRETTY_FUNCTION__))
;
4631 if (getTargetMachine().getCodeModel() == CodeModel::Large)
4632 report_fatal_error("ELF TLS only supported in small memory model");
4633 // Different choices can be made for the maximum size of the TLS area for a
4634 // module. For the small address model, the default TLS size is 16MiB and the
4635 // maximum TLS size is 4GiB.
4636 // FIXME: add -mtls-size command line option and make it control the 16MiB
4637 // vs. 4GiB code sequence generation.
4638 // FIXME: add tiny codemodel support. We currently generate the same code as
4639 // small, which may be larger than needed.
4640 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4641
4642 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
4643
4644 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
4645 if (Model == TLSModel::LocalDynamic)
4646 Model = TLSModel::GeneralDynamic;
4647 }
4648
4649 SDValue TPOff;
4650 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4651 SDLoc DL(Op);
4652 const GlobalValue *GV = GA->getGlobal();
4653
4654 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
4655
4656 if (Model == TLSModel::LocalExec) {
4657 SDValue HiVar = DAG.getTargetGlobalAddress(
4658 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4659 SDValue LoVar = DAG.getTargetGlobalAddress(
4660 GV, DL, PtrVT, 0,
4661 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4662
4663 SDValue TPWithOff_lo =
4664 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
4665 HiVar,
4666 DAG.getTargetConstant(0, DL, MVT::i32)),
4667 0);
4668 SDValue TPWithOff =
4669 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
4670 LoVar,
4671 DAG.getTargetConstant(0, DL, MVT::i32)),
4672 0);
4673 return TPWithOff;
4674 } else if (Model == TLSModel::InitialExec) {
4675 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4676 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
4677 } else if (Model == TLSModel::LocalDynamic) {
4678 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
4679 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
4680 // the beginning of the module's TLS region, followed by a DTPREL offset
4681 // calculation.
4682
4683 // These accesses will need deduplicating if there's more than one.
4684 AArch64FunctionInfo *MFI =
4685 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4686 MFI->incNumLocalDynamicTLSAccesses();
4687
4688 // The call needs a relocation too for linker relaxation. It doesn't make
4689 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4690 // the address.
4691 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
4692 AArch64II::MO_TLS);
4693
4694 // Now we can calculate the offset from TPIDR_EL0 to this module's
4695 // thread-local area.
4696 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4697
4698 // Now use :dtprel_whatever: operations to calculate this variable's offset
4699 // in its thread-storage area.
4700 SDValue HiVar = DAG.getTargetGlobalAddress(
4701 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4702 SDValue LoVar = DAG.getTargetGlobalAddress(
4703 GV, DL, MVT::i64, 0,
4704 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4705
4706 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
4707 DAG.getTargetConstant(0, DL, MVT::i32)),
4708 0);
4709 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
4710 DAG.getTargetConstant(0, DL, MVT::i32)),
4711 0);
4712 } else if (Model == TLSModel::GeneralDynamic) {
4713 // The call needs a relocation too for linker relaxation. It doesn't make
4714 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4715 // the address.
4716 SDValue SymAddr =
4717 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4718
4719 // Finally we can make a call to calculate the offset from tpidr_el0.
4720 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4721 } else
4722 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4722)
;
4723
4724 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
4725}
4726
4727SDValue
4728AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
4729 SelectionDAG &DAG) const {
4730 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")((Subtarget->isTargetWindows() && "Windows specific TLS lowering"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4730, __PRETTY_FUNCTION__))
;
4731
4732 SDValue Chain = DAG.getEntryNode();
4733 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4734 SDLoc DL(Op);
4735
4736 SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
4737
4738 // Load the ThreadLocalStoragePointer from the TEB
4739 // A pointer to the TLS array is located at offset 0x58 from the TEB.
4740 SDValue TLSArray =
4741 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
4742 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
4743 Chain = TLSArray.getValue(1);
4744
4745 // Load the TLS index from the C runtime;
4746 // This does the same as getAddr(), but without having a GlobalAddressSDNode.
4747 // This also does the same as LOADgot, but using a generic i32 load,
4748 // while LOADgot only loads i64.
4749 SDValue TLSIndexHi =
4750 DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
4751 SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
4752 "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4753 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
4754 SDValue TLSIndex =
4755 DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
4756 TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
4757 Chain = TLSIndex.getValue(1);
4758
4759 // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
4760 // offset into the TLSArray.
4761 TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
4762 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
4763 DAG.getConstant(3, DL, PtrVT));
4764 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
4765 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
4766 MachinePointerInfo());
4767 Chain = TLS.getValue(1);
4768
4769 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4770 const GlobalValue *GV = GA->getGlobal();
4771 SDValue TGAHi = DAG.getTargetGlobalAddress(
4772 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4773 SDValue TGALo = DAG.getTargetGlobalAddress(
4774 GV, DL, PtrVT, 0,
4775 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4776
4777 // Add the offset from the start of the .tls section (section base).
4778 SDValue Addr =
4779 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
4780 DAG.getTargetConstant(0, DL, MVT::i32)),
4781 0);
4782 Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
4783 return Addr;
4784}
4785
4786SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
4787 SelectionDAG &DAG) const {
4788 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4789 if (DAG.getTarget().useEmulatedTLS())
4790 return LowerToTLSEmulatedModel(GA, DAG);
4791
4792 if (Subtarget->isTargetDarwin())
4793 return LowerDarwinGlobalTLSAddress(Op, DAG);
4794 if (Subtarget->isTargetELF())
4795 return LowerELFGlobalTLSAddress(Op, DAG);
4796 if (Subtarget->isTargetWindows())
4797 return LowerWindowsGlobalTLSAddress(Op, DAG);
4798
4799 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4799)
;
4800}
4801
4802SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4803 SDValue Chain = Op.getOperand(0);
4804 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4805 SDValue LHS = Op.getOperand(2);
4806 SDValue RHS = Op.getOperand(3);
4807 SDValue Dest = Op.getOperand(4);
4808 SDLoc dl(Op);
4809
4810 MachineFunction &MF = DAG.getMachineFunction();
4811 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
4812 // will not be produced, as they are conditional branch instructions that do
4813 // not set flags.
4814 bool ProduceNonFlagSettingCondBr =
4815 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
4816
4817 // Handle f128 first, since lowering it will result in comparing the return
4818 // value of a libcall against zero, which is just what the rest of LowerBR_CC
4819 // is expecting to deal with.
4820 if (LHS.getValueType() == MVT::f128) {
4821 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
4822
4823 // If softenSetCCOperands returned a scalar, we need to compare the result
4824 // against zero to select between true and false values.
4825 if (!RHS.getNode()) {
4826 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4827 CC = ISD::SETNE;
4828 }
4829 }
4830
4831 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4832 // instruction.
4833 if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
4834 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4835 // Only lower legal XALUO ops.
4836 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4837 return SDValue();
4838
4839 // The actual operation with overflow check.
4840 AArch64CC::CondCode OFCC;
4841 SDValue Value, Overflow;
4842 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
4843
4844 if (CC == ISD::SETNE)
4845 OFCC = getInvertedCondCode(OFCC);
4846 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
4847
4848 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4849 Overflow);
4850 }
4851
4852 if (LHS.getValueType().isInteger()) {
4853 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4854, __PRETTY_FUNCTION__))
4854 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4854, __PRETTY_FUNCTION__))
;
4855
4856 // If the RHS of the comparison is zero, we can potentially fold this
4857 // to a specialized branch.
4858 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
4859 if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
4860 if (CC == ISD::SETEQ) {
4861 // See if we can use a TBZ to fold in an AND as well.
4862 // TBZ has a smaller branch displacement than CBZ. If the offset is
4863 // out of bounds, a late MI-layer pass rewrites branches.
4864 // 403.gcc is an example that hits this case.
4865 if (LHS.getOpcode() == ISD::AND &&
4866 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4867 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4868 SDValue Test = LHS.getOperand(0);
4869 uint64_t Mask = LHS.getConstantOperandVal(1);
4870 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
4871 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4872 Dest);
4873 }
4874
4875 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
4876 } else if (CC == ISD::SETNE) {
4877 // See if we can use a TBZ to fold in an AND as well.
4878 // TBZ has a smaller branch displacement than CBZ. If the offset is
4879 // out of bounds, a late MI-layer pass rewrites branches.
4880 // 403.gcc is an example that hits this case.
4881 if (LHS.getOpcode() == ISD::AND &&
4882 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4883 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4884 SDValue Test = LHS.getOperand(0);
4885 uint64_t Mask = LHS.getConstantOperandVal(1);
4886 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
4887 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4888 Dest);
4889 }
4890
4891 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
4892 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
4893 // Don't combine AND since emitComparison converts the AND to an ANDS
4894 // (a.k.a. TST) and the test in the test bit and branch instruction
4895 // becomes redundant. This would also increase register pressure.
4896 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4897 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
4898 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4899 }
4900 }
4901 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
4902 LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
4903 // Don't combine AND since emitComparison converts the AND to an ANDS
4904 // (a.k.a. TST) and the test in the test bit and branch instruction
4905 // becomes redundant. This would also increase register pressure.
4906 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4907 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
4908 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4909 }
4910
4911 SDValue CCVal;
4912 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4913 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4914 Cmp);
4915 }
4916
4917 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4918, __PRETTY_FUNCTION__))
4918 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4918, __PRETTY_FUNCTION__))
;
4919
4920 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4921 // clean. Some of them require two branches to implement.
4922 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4923 AArch64CC::CondCode CC1, CC2;
4924 changeFPCCToAArch64CC(CC, CC1, CC2);
4925 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4926 SDValue BR1 =
4927 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
4928 if (CC2 != AArch64CC::AL) {
4929 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4930 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
4931 Cmp);
4932 }
4933
4934 return BR1;
4935}
4936
4937SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
4938 SelectionDAG &DAG) const {
4939 EVT VT = Op.getValueType();
4940 SDLoc DL(Op);
4941
4942 SDValue In1 = Op.getOperand(0);
4943 SDValue In2 = Op.getOperand(1);
4944 EVT SrcVT = In2.getValueType();
4945
4946 if (SrcVT.bitsLT(VT))
4947 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
4948 else if (SrcVT.bitsGT(VT))
4949 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
4950
4951 EVT VecVT;
4952 uint64_t EltMask;
4953 SDValue VecVal1, VecVal2;
4954
4955 auto setVecVal = [&] (int Idx) {
4956 if (!VT.isVector()) {
4957 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4958 DAG.getUNDEF(VecVT), In1);
4959 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4960 DAG.getUNDEF(VecVT), In2);
4961 } else {
4962 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
4963 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
4964 }
4965 };
4966
4967 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
4968 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
4969 EltMask = 0x80000000ULL;
4970 setVecVal(AArch64::ssub);
4971 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
4972 VecVT = MVT::v2i64;
4973
4974 // We want to materialize a mask with the high bit set, but the AdvSIMD
4975 // immediate moves cannot materialize that in a single instruction for
4976 // 64-bit elements. Instead, materialize zero and then negate it.
4977 EltMask = 0;
4978
4979 setVecVal(AArch64::dsub);
4980 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
4981 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
4982 EltMask = 0x8000ULL;
4983 setVecVal(AArch64::hsub);
4984 } else {
4985 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4985)
;
4986 }
4987
4988 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
4989
4990 // If we couldn't materialize the mask above, then the mask vector will be
4991 // the zero vector, and we need to negate it here.
4992 if (VT == MVT::f64 || VT == MVT::v2f64) {
4993 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
4994 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
4995 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
4996 }
4997
4998 SDValue Sel =
4999 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
5000
5001 if (VT == MVT::f16)
5002 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
5003 if (VT == MVT::f32)
5004 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
5005 else if (VT == MVT::f64)
5006 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
5007 else
5008 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
5009}
5010
5011SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
5012 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
5013 Attribute::NoImplicitFloat))
5014 return SDValue();
5015
5016 if (!Subtarget->hasNEON())
5017 return SDValue();
5018
5019 // While there is no integer popcount instruction, it can
5020 // be more efficiently lowered to the following sequence that uses
5021 // AdvSIMD registers/instructions as long as the copies to/from
5022 // the AdvSIMD registers are cheap.
5023 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
5024 // CNT V0.8B, V0.8B // 8xbyte pop-counts
5025 // ADDV B0, V0.8B // sum 8xbyte pop-counts
5026 // UMOV X0, V0.B[0] // copy byte result back to integer reg
5027 SDValue Val = Op.getOperand(0);
5028 SDLoc DL(Op);
5029 EVT VT = Op.getValueType();
5030
5031 if (VT == MVT::i32 || VT == MVT::i64) {
5032 if (VT == MVT::i32)
5033 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
5034 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
5035
5036 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
5037 SDValue UaddLV = DAG.getNode(
5038 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
5039 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
5040
5041 if (VT == MVT::i64)
5042 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
5043 return UaddLV;
5044 }
5045
5046 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5048, __PRETTY_FUNCTION__))
5047 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5048, __PRETTY_FUNCTION__))
5048 "Unexpected type for custom ctpop lowering")(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5048, __PRETTY_FUNCTION__))
;
5049
5050 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5051 Val = DAG.getBitcast(VT8Bit, Val);
5052 Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
5053
5054 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
5055 unsigned EltSize = 8;
5056 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
5057 while (EltSize != VT.getScalarSizeInBits()) {
5058 EltSize *= 2;
5059 NumElts /= 2;
5060 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
5061 Val = DAG.getNode(
5062 ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
5063 DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
5064 }
5065
5066 return Val;
5067}
5068
5069SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
5070
5071 if (Op.getValueType().isVector())
5072 return LowerVSETCC(Op, DAG);
5073
5074 SDValue LHS = Op.getOperand(0);
5075 SDValue RHS = Op.getOperand(1);
5076 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
5077 SDLoc dl(Op);
5078
5079 // We chose ZeroOrOneBooleanContents, so use zero and one.
5080 EVT VT = Op.getValueType();
5081 SDValue TVal = DAG.getConstant(1, dl, VT);
5082 SDValue FVal = DAG.getConstant(0, dl, VT);
5083
5084 // Handle f128 first, since one possible outcome is a normal integer
5085 // comparison which gets picked up by the next if statement.
5086 if (LHS.getValueType() == MVT::f128) {
5087 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
5088
5089 // If softenSetCCOperands returned a scalar, use it.
5090 if (!RHS.getNode()) {
5091 assert(LHS.getValueType() == Op.getValueType() &&((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5092, __PRETTY_FUNCTION__))
5092 "Unexpected setcc expansion!")((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5092, __PRETTY_FUNCTION__))
;
5093 return LHS;
5094 }
5095 }
5096
5097 if (LHS.getValueType().isInteger()) {
5098 SDValue CCVal;
5099 SDValue Cmp = getAArch64Cmp(
5100 LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
5101
5102 // Note that we inverted the condition above, so we reverse the order of
5103 // the true and false operands here. This will allow the setcc to be
5104 // matched to a single CSINC instruction.
5105 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
5106 }
5107
5108 // Now we know we're dealing with FP values.
5109 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5110, __PRETTY_FUNCTION__))
5110 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5110, __PRETTY_FUNCTION__))
;
5111
5112 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
5113 // and do the comparison.
5114 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
5115
5116 AArch64CC::CondCode CC1, CC2;
5117 changeFPCCToAArch64CC(CC, CC1, CC2);
5118 if (CC2 == AArch64CC::AL) {
5119 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
5120 CC2);
5121 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5122
5123 // Note that we inverted the condition above, so we reverse the order of
5124 // the true and false operands here. This will allow the setcc to be
5125 // matched to a single CSINC instruction.
5126 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
5127 } else {
5128 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
5129 // totally clean. Some of them require two CSELs to implement. As is in
5130 // this case, we emit the first CSEL and then emit a second using the output
5131 // of the first as the RHS. We're effectively OR'ing the two CC's together.
5132
5133 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
5134 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5135 SDValue CS1 =
5136 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
5137
5138 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
5139 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
5140 }
5141}
5142
5143SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
5144 SDValue RHS, SDValue TVal,
5145 SDValue FVal, const SDLoc &dl,
5146 SelectionDAG &DAG) const {
5147 // Handle f128 first, because it will result in a comparison of some RTLIB
5148 // call result against zero.
5149 if (LHS.getValueType() == MVT::f128) {
1
Taking true branch
5150 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
5151
5152 // If softenSetCCOperands returned a scalar, we need to compare the result
5153 // against zero to select between true and false values.
5154 if (!RHS.getNode()) {
2
Assuming the condition is false
3
Taking false branch
5155 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5156 CC = ISD::SETNE;
5157 }
5158 }
5159
5160 // Also handle f16, for which we need to do a f32 comparison.
5161 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4
Taking false branch
5162 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
5163 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
5164 }
5165
5166 // Next, handle integers.
5167 if (LHS.getValueType().isInteger()) {
5
Taking true branch
5168 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5169, __PRETTY_FUNCTION__))
5169 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5169, __PRETTY_FUNCTION__))
;
5170
5171 unsigned Opcode = AArch64ISD::CSEL;
5172
5173 // If both the TVal and the FVal are constants, see if we can swap them in
5174 // order to for a CSINV or CSINC out of them.
5175 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
5176 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
6
Calling 'dyn_cast<llvm::ConstantSDNode, llvm::SDValue>'
21
Returning from 'dyn_cast<llvm::ConstantSDNode, llvm::SDValue>'
5177
5178 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
22
Assuming 'CTVal' is null
5179 std::swap(TVal, FVal);
5180 std::swap(CTVal, CFVal);
5181 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5182 } else if (CTVal
22.1
'CTVal' is null
22.1
'CTVal' is null
22.1
'CTVal' is null
&& CFVal && CTVal->isOne() && CFVal->isNullValue()) {
5183 std::swap(TVal, FVal);
5184 std::swap(CTVal, CFVal);
5185 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5186 } else if (TVal.getOpcode() == ISD::XOR) {
23
Calling 'SDValue::getOpcode'
5187 // If TVal is a NOT we want to swap TVal and FVal so that we can match
5188 // with a CSINV rather than a CSEL.
5189 if (isAllOnesConstant(TVal.getOperand(1))) {
5190 std::swap(TVal, FVal);
5191 std::swap(CTVal, CFVal);
5192 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5193 }
5194 } else if (TVal.getOpcode() == ISD::SUB) {
5195 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
5196 // that we can match with a CSNEG rather than a CSEL.
5197 if (isNullConstant(TVal.getOperand(0))) {
5198 std::swap(TVal, FVal);
5199 std::swap(CTVal, CFVal);
5200 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5201 }
5202 } else if (CTVal && CFVal) {
5203 const int64_t TrueVal = CTVal->getSExtValue();
5204 const int64_t FalseVal = CFVal->getSExtValue();
5205 bool Swap = false;
5206
5207 // If both TVal and FVal are constants, see if FVal is the
5208 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
5209 // instead of a CSEL in that case.
5210 if (TrueVal == ~FalseVal) {
5211 Opcode = AArch64ISD::CSINV;
5212 } else if (TrueVal == -FalseVal) {
5213 Opcode = AArch64ISD::CSNEG;
5214 } else if (TVal.getValueType() == MVT::i32) {
5215 // If our operands are only 32-bit wide, make sure we use 32-bit
5216 // arithmetic for the check whether we can use CSINC. This ensures that
5217 // the addition in the check will wrap around properly in case there is
5218 // an overflow (which would not be the case if we do the check with
5219 // 64-bit arithmetic).
5220 const uint32_t TrueVal32 = CTVal->getZExtValue();
5221 const uint32_t FalseVal32 = CFVal->getZExtValue();
5222
5223 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
5224 Opcode = AArch64ISD::CSINC;
5225
5226 if (TrueVal32 > FalseVal32) {
5227 Swap = true;
5228 }
5229 }
5230 // 64-bit check whether we can use CSINC.
5231 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
5232 Opcode = AArch64ISD::CSINC;
5233
5234 if (TrueVal > FalseVal) {
5235 Swap = true;
5236 }
5237 }
5238
5239 // Swap TVal and FVal if necessary.
5240 if (Swap) {
5241 std::swap(TVal, FVal);
5242 std::swap(CTVal, CFVal);
5243 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5244 }
5245
5246 if (Opcode != AArch64ISD::CSEL) {
5247 // Drop FVal since we can get its value by simply inverting/negating
5248 // TVal.
5249 FVal = TVal;
5250 }
5251 }
5252
5253 // Avoid materializing a constant when possible by reusing a known value in
5254 // a register. However, don't perform this optimization if the known value
5255 // is one, zero or negative one in the case of a CSEL. We can always
5256 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
5257 // FVal, respectively.
5258 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
5259 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
5260 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
5261 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
5262 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
5263 // "a != C ? x : a" to avoid materializing C.
5264 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
5265 TVal = LHS;
5266 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
5267 FVal = LHS;
5268 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
5269 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")((CTVal && CFVal && "Expected constant operands for CSNEG."
) ? static_cast<void> (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5269, __PRETTY_FUNCTION__))
;
5270 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
5271 // avoid materializing C.
5272 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
5273 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
5274 Opcode = AArch64ISD::CSINV;
5275 TVal = LHS;
5276 FVal = DAG.getConstant(0, dl, FVal.getValueType());
5277 }
5278 }
5279
5280 SDValue CCVal;
5281 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
5282 EVT VT = TVal.getValueType();
5283 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
5284 }
5285
5286 // Now we know we're dealing with FP values.
5287 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5288, __PRETTY_FUNCTION__))
5288 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5288, __PRETTY_FUNCTION__))
;
5289 assert(LHS.getValueType() == RHS.getValueType())((LHS.getValueType() == RHS.getValueType()) ? static_cast<
void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5289, __PRETTY_FUNCTION__))
;
5290 EVT VT = TVal.getValueType();
5291 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
5292
5293 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
5294 // clean. Some of them require two CSELs to implement.
5295 AArch64CC::CondCode CC1, CC2;
5296 changeFPCCToAArch64CC(CC, CC1, CC2);
5297
5298 if (DAG.getTarget().Options.UnsafeFPMath) {
5299 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
5300 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
5301 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
5302 if (RHSVal && RHSVal->isZero()) {
5303 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
5304 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
5305
5306 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
5307 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
5308 TVal = LHS;
5309 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
5310 CFVal && CFVal->isZero() &&
5311 FVal.getValueType() == LHS.getValueType())
5312 FVal = LHS;
5313 }
5314 }
5315
5316 // Emit first, and possibly only, CSEL.
5317 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5318 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
5319
5320 // If we need a second CSEL, emit it, using the output of the first as the
5321 // RHS. We're effectively OR'ing the two CC's together.
5322 if (CC2 != AArch64CC::AL) {
5323 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
5324 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
5325 }
5326
5327 // Otherwise, return the output of the first CSEL.
5328 return CS1;
5329}
5330
5331SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
5332 SelectionDAG &DAG) const {
5333 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5334 SDValue LHS = Op.getOperand(0);
5335 SDValue RHS = Op.getOperand(1);
5336 SDValue TVal = Op.getOperand(2);
5337 SDValue FVal = Op.getOperand(3);
5338 SDLoc DL(Op);
5339 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
5340}
5341
5342SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
5343 SelectionDAG &DAG) const {
5344 SDValue CCVal = Op->getOperand(0);
5345 SDValue TVal = Op->getOperand(1);
5346 SDValue FVal = Op->getOperand(2);
5347 SDLoc DL(Op);
5348
5349 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
5350 // instruction.
5351 if (isOverflowIntrOpRes(CCVal)) {
5352 // Only lower legal XALUO ops.
5353 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
5354 return SDValue();
5355
5356 AArch64CC::CondCode OFCC;
5357 SDValue Value, Overflow;
5358 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
5359 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
5360
5361 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
5362 CCVal, Overflow);
5363 }
5364
5365 // Lower it the same way as we would lower a SELECT_CC node.
5366 ISD::CondCode CC;
5367 SDValue LHS, RHS;
5368 if (CCVal.getOpcode() == ISD::SETCC) {
5369 LHS = CCVal.getOperand(0);
5370 RHS = CCVal.getOperand(1);
5371 CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
5372 } else {
5373 LHS = CCVal;
5374 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
5375 CC = ISD::SETNE;
5376 }
5377 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
5378}
5379
5380SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
5381 SelectionDAG &DAG) const {
5382 // Jump table entries as PC relative offsets. No additional tweaking
5383 // is necessary here. Just get the address of the jump table.
5384 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
5385
5386 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
5387 !Subtarget->isTargetMachO()) {
5388 return getAddrLarge(JT, DAG);
5389 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5390 return getAddrTiny(JT, DAG);
5391 }
5392 return getAddr(JT, DAG);
5393}
5394
5395SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
5396 SelectionDAG &DAG) const {
5397 // Jump table entries as PC relative offsets. No additional tweaking
5398 // is necessary here. Just get the address of the jump table.
5399 SDLoc DL(Op);
5400 SDValue JT = Op.getOperand(1);
5401 SDValue Entry = Op.getOperand(2);
5402 int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
5403
5404 SDNode *Dest =
5405 DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
5406 Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
5407 return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
5408 SDValue(Dest, 0));
5409}
5410
5411SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
5412 SelectionDAG &DAG) const {
5413 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
5414
5415 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
5416 // Use the GOT for the large code model on iOS.
5417 if (Subtarget->isTargetMachO()) {
5418 return getGOT(CP, DAG);
5419 }
5420 return getAddrLarge(CP, DAG);
5421 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5422 return getAddrTiny(CP, DAG);
5423 } else {
5424 return getAddr(CP, DAG);
5425 }
5426}
5427
5428SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
5429 SelectionDAG &DAG) const {
5430 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
5431 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
5432 !Subtarget->isTargetMachO()) {
5433 return getAddrLarge(BA, DAG);
5434 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5435 return getAddrTiny(BA, DAG);
5436 }
5437 return getAddr(BA, DAG);
5438}
5439
5440SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
5441 SelectionDAG &DAG) const {
5442 AArch64FunctionInfo *FuncInfo =
5443 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
5444
5445 SDLoc DL(Op);
5446 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
5447 getPointerTy(DAG.getDataLayout()));
5448 FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
5449 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5450 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
5451 MachinePointerInfo(SV));
5452}
5453
5454SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
5455 SelectionDAG &DAG) const {
5456 AArch64FunctionInfo *FuncInfo =
5457 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
5458
5459 SDLoc DL(Op);
5460 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
5461 ? FuncInfo->getVarArgsGPRIndex()
5462 : FuncInfo->getVarArgsStackIndex(),
5463 getPointerTy(DAG.getDataLayout()));
5464 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5465 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
5466 MachinePointerInfo(SV));
5467}
5468
5469SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
5470 SelectionDAG &DAG) const {
5471 // The layout of the va_list struct is specified in the AArch64 Procedure Call
5472 // Standard, section B.3.
5473 MachineFunction &MF = DAG.getMachineFunction();
5474 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5475 auto PtrVT = getPointerTy(DAG.getDataLayout());
5476 SDLoc DL(Op);
5477
5478 SDValue Chain = Op.getOperand(0);
5479 SDValue VAList = Op.getOperand(1);
5480 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5481 SmallVector<SDValue, 4> MemOps;
5482
5483 // void *__stack at offset 0
5484 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
5485 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
5486 MachinePointerInfo(SV), /* Alignment = */ 8));
5487
5488 // void *__gr_top at offset 8
5489 int GPRSize = FuncInfo->getVarArgsGPRSize();
5490 if (GPRSize > 0) {
5491 SDValue GRTop, GRTopAddr;
5492
5493 GRTopAddr =
5494 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
5495
5496 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
5497 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
5498 DAG.getConstant(GPRSize, DL, PtrVT));
5499
5500 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
5501 MachinePointerInfo(SV, 8),
5502 /* Alignment = */ 8));
5503 }
5504
5505 // void *__vr_top at offset 16
5506 int FPRSize = FuncInfo->getVarArgsFPRSize();
5507 if (FPRSize > 0) {
5508 SDValue VRTop, VRTopAddr;
5509 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5510 DAG.getConstant(16, DL, PtrVT));
5511
5512 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
5513 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
5514 DAG.getConstant(FPRSize, DL, PtrVT));
5515
5516 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
5517 MachinePointerInfo(SV, 16),
5518 /* Alignment = */ 8));
5519 }
5520
5521 // int __gr_offs at offset 24
5522 SDValue GROffsAddr =
5523 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
5524 MemOps.push_back(DAG.getStore(
5525 Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
5526 MachinePointerInfo(SV, 24), /* Alignment = */ 4));
5527
5528 // int __vr_offs at offset 28
5529 SDValue VROffsAddr =
5530 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
5531 MemOps.push_back(DAG.getStore(
5532 Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
5533 MachinePointerInfo(SV, 28), /* Alignment = */ 4));
5534
5535 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5536}
5537
5538SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
5539 SelectionDAG &DAG) const {
5540 MachineFunction &MF = DAG.getMachineFunction();
5541
5542 if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
5543 return LowerWin64_VASTART(Op, DAG);
5544 else if (Subtarget->isTargetDarwin())
5545 return LowerDarwin_VASTART(Op, DAG);
5546 else
5547 return LowerAAPCS_VASTART(Op, DAG);
5548}
5549
5550SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
5551 SelectionDAG &DAG) const {
5552 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
5553 // pointer.
5554 SDLoc DL(Op);
5555 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
5556 unsigned VaListSize = (Subtarget->isTargetDarwin() ||
5557 Subtarget->isTargetWindows()) ? PtrSize : 32;
5558 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
5559 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
5560
5561 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
5562 DAG.getConstant(VaListSize, DL, MVT::i32), PtrSize,
5563 false, false, false, MachinePointerInfo(DestSV),
5564 MachinePointerInfo(SrcSV));
5565}
5566
5567SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
5568 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5569, __PRETTY_FUNCTION__))
5569 "automatic va_arg instruction only works on Darwin")((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5569, __PRETTY_FUNCTION__))
;
5570
5571 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5572 EVT VT = Op.getValueType();
5573 SDLoc DL(Op);
5574 SDValue Chain = Op.getOperand(0);
5575 SDValue Addr = Op.getOperand(1);
5576 unsigned Align = Op.getConstantOperandVal(3);
5577 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
5578 auto PtrVT = getPointerTy(DAG.getDataLayout());
5579 auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
5580 SDValue VAList =
5581 DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
5582 Chain = VAList.getValue(1);
5583 VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
5584
5585 if (Align > MinSlotSize) {
5586 assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2")((((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"
) ? static_cast<void> (0) : __assert_fail ("((Align & (Align - 1)) == 0) && \"Expected Align to be a power of 2\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5586, __PRETTY_FUNCTION__))
;
5587 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5588 DAG.getConstant(Align - 1, DL, PtrVT));
5589 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
5590 DAG.getConstant(-(int64_t)Align, DL, PtrVT));
5591 }
5592
5593 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
5594 unsigned ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
5595
5596 // Scalar integer and FP values smaller than 64 bits are implicitly extended
5597 // up to 64 bits. At the very least, we have to increase the striding of the
5598 // vaargs list to match this, and for FP values we need to introduce
5599 // FP_ROUND nodes as well.
5600 if (VT.isInteger() && !VT.isVector())
5601 ArgSize = std::max(ArgSize, MinSlotSize);
5602 bool NeedFPTrunc = false;
5603 if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
5604 ArgSize = 8;
5605 NeedFPTrunc = true;
5606 }
5607
5608 // Increment the pointer, VAList, to the next vaarg
5609 SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5610 DAG.getConstant(ArgSize, DL, PtrVT));
5611 VANext = DAG.getZExtOrTrunc(VANext, DL, PtrMemVT);
5612
5613 // Store the incremented VAList to the legalized pointer
5614 SDValue APStore =
5615 DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
5616
5617 // Load the actual argument out of the pointer VAList
5618 if (NeedFPTrunc) {
5619 // Load the value as an f64.
5620 SDValue WideFP =
5621 DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
5622 // Round the value down to an f32.
5623 SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
5624 DAG.getIntPtrConstant(1, DL));
5625 SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
5626 // Merge the rounded value with the chain output of the load.
5627 return DAG.getMergeValues(Ops, DL);
5628 }
5629
5630 return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
5631}
5632
5633SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
5634 SelectionDAG &DAG) const {
5635 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5636 MFI.setFrameAddressIsTaken(true);
5637
5638 EVT VT = Op.getValueType();
5639 SDLoc DL(Op);
5640 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5641 SDValue FrameAddr =
5642 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, MVT::i64);
5643 while (Depth--)
5644 FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
5645 MachinePointerInfo());
5646
5647 if (Subtarget->isTargetILP32())
5648 FrameAddr = DAG.getNode(ISD::AssertZext, DL, MVT::i64, FrameAddr,
5649 DAG.getValueType(VT));
5650
5651 return FrameAddr;
5652}
5653
5654SDValue AArch64TargetLowering::LowerSPONENTRY(SDValue Op,
5655 SelectionDAG &DAG) const {
5656 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5657
5658 EVT VT = getPointerTy(DAG.getDataLayout());
5659 SDLoc DL(Op);
5660 int FI = MFI.CreateFixedObject(4, 0, false);
5661 return DAG.getFrameIndex(FI, VT);
5662}
5663
5664#define GET_REGISTER_MATCHER
5665#include "AArch64GenAsmMatcher.inc"
5666
5667// FIXME? Maybe this could be a TableGen attribute on some registers and
5668// this table could be generated automatically from RegInfo.
5669Register AArch64TargetLowering::
5670getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const {
5671 Register Reg = MatchRegisterName(RegName);
5672 if (AArch64::X1 <= Reg && Reg <= AArch64::X28) {
5673 const MCRegisterInfo *MRI = Subtarget->getRegisterInfo();
5674 unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
5675 if (!Subtarget->isXRegisterReserved(DwarfRegNum))
5676 Reg = 0;
5677 }
5678 if (Reg)
5679 return Reg;
5680 report_fatal_error(Twine("Invalid register name \""
5681 + StringRef(RegName) + "\"."));
5682}
5683
5684SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
5685 SelectionDAG &DAG) const {
5686 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
5687
5688 EVT VT = Op.getValueType();
5689 SDLoc DL(Op);
5690
5691 SDValue FrameAddr =
5692 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
5693 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
5694
5695 return DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset);
5696}
5697
5698SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
5699 SelectionDAG &DAG) const {
5700 MachineFunction &MF = DAG.getMachineFunction();
5701 MachineFrameInfo &MFI = MF.getFrameInfo();
5702 MFI.setReturnAddressIsTaken(true);
5703
5704 EVT VT = Op.getValueType();
5705 SDLoc DL(Op);
5706 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5707 if (Depth) {
5708 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5709 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
5710 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
5711 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
5712 MachinePointerInfo());
5713 }
5714
5715 // Return LR, which contains the return address. Mark it an implicit live-in.
5716 unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
5717 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
5718}
5719
5720/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
5721/// i64 values and take a 2 x i64 value to shift plus a shift amount.
5722SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
5723 SelectionDAG &DAG) const {
5724 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5724, __PRETTY_FUNCTION__))
;
5725 EVT VT = Op.getValueType();
5726 unsigned VTBits = VT.getSizeInBits();
5727 SDLoc dl(Op);
5728 SDValue ShOpLo = Op.getOperand(0);
5729 SDValue ShOpHi = Op.getOperand(1);
5730 SDValue ShAmt = Op.getOperand(2);
5731 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
5732
5733 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::
SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5733, __PRETTY_FUNCTION__))
;
5734
5735 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
5736 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
5737 SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
5738
5739 // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
5740 // is "undef". We wanted 0, so CSEL it directly.
5741 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
5742 ISD::SETEQ, dl, DAG);
5743 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
5744 HiBitsForLo =
5745 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
5746 HiBitsForLo, CCVal, Cmp);
5747
5748 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
5749 DAG.getConstant(VTBits, dl, MVT::i64));
5750
5751 SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
5752 SDValue LoForNormalShift =
5753 DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
5754
5755 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
5756 dl, DAG);
5757 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
5758 SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
5759 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
5760 LoForNormalShift, CCVal, Cmp);
5761
5762 // AArch64 shifts larger than the register width are wrapped rather than
5763 // clamped, so we can't just emit "hi >> x".
5764 SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
5765 SDValue HiForBigShift =
5766 Opc == ISD::SRA
5767 ? DAG.getNode(Opc, dl, VT, ShOpHi,
5768 DAG.getConstant(VTBits - 1, dl, MVT::i64))
5769 : DAG.getConstant(0, dl, VT);
5770 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
5771 HiForNormalShift, CCVal, Cmp);
5772
5773 SDValue Ops[2] = { Lo, Hi };
5774 return DAG.getMergeValues(Ops, dl);
5775}
5776
5777/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
5778/// i64 values and take a 2 x i64 value to shift plus a shift amount.
5779SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
5780 SelectionDAG &DAG) const {
5781 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5781, __PRETTY_FUNCTION__))
;
5782 EVT VT = Op.getValueType();
5783 unsigned VTBits = VT.getSizeInBits();
5784 SDLoc dl(Op);
5785 SDValue ShOpLo = Op.getOperand(0);
5786 SDValue ShOpHi = Op.getOperand(1);
5787 SDValue ShAmt = Op.getOperand(2);
5788
5789 assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void>
(0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5789, __PRETTY_FUNCTION__))
;
5790 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
5791 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
5792 SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
5793
5794 // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
5795 // is "undef". We wanted 0, so CSEL it directly.
5796 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
5797 ISD::SETEQ, dl, DAG);
5798 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
5799 LoBitsForHi =
5800 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
5801 LoBitsForHi, CCVal, Cmp);
5802
5803 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
5804 DAG.getConstant(VTBits, dl, MVT::i64));
5805 SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
5806 SDValue HiForNormalShift =
5807 DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
5808
5809 SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
5810
5811 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
5812 dl, DAG);
5813 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
5814 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
5815 HiForNormalShift, CCVal, Cmp);
5816
5817 // AArch64 shifts of larger than register sizes are wrapped rather than
5818 // clamped, so we can't just emit "lo << a" if a is too big.
5819 SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
5820 SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
5821 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
5822 LoForNormalShift, CCVal, Cmp);
5823
5824 SDValue Ops[2] = { Lo, Hi };
5825 return DAG.getMergeValues(Ops, dl);
5826}
5827
5828bool AArch64TargetLowering::isOffsetFoldingLegal(
5829 const GlobalAddressSDNode *GA) const {
5830 // Offsets are folded in the DAG combine rather than here so that we can
5831 // intelligently choose an offset based on the uses.
5832 return false;
5833}
5834
5835bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5836 bool OptForSize) const {
5837 bool IsLegal = false;
5838 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and
5839 // 16-bit case when target has full fp16 support.
5840 // FIXME: We should be able to handle f128 as well with a clever lowering.
5841 const APInt ImmInt = Imm.bitcastToAPInt();
5842 if (VT == MVT::f64)
5843 IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
5844 else if (VT == MVT::f32)
5845 IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
5846 else if (VT == MVT::f16 && Subtarget->hasFullFP16())
5847 IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero();
5848 // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to
5849 // generate that fmov.
5850
5851 // If we can not materialize in immediate field for fmov, check if the
5852 // value can be encoded as the immediate operand of a logical instruction.
5853 // The immediate value will be created with either MOVZ, MOVN, or ORR.
5854 if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) {
5855 // The cost is actually exactly the same for mov+fmov vs. adrp+ldr;
5856 // however the mov+fmov sequence is always better because of the reduced
5857 // cache pressure. The timings are still the same if you consider
5858 // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the
5859 // movw+movk is fused). So we limit up to 2 instrdduction at most.
5860 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
5861 AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(),
5862 Insn);
5863 unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2));
5864 IsLegal = Insn.size() <= Limit;
5865 }
5866
5867 LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << (IsLegal ? "Legal " : "Illegal "
) << VT.getEVTString() << " imm value: "; Imm.dump
();; } } while (false)
5868 << " imm value: "; Imm.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << (IsLegal ? "Legal " : "Illegal "
) << VT.getEVTString() << " imm value: "; Imm.dump
();; } } while (false)
;
5869 return IsLegal;
5870}
5871
5872//===----------------------------------------------------------------------===//
5873// AArch64 Optimization Hooks
5874//===----------------------------------------------------------------------===//
5875
5876static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
5877 SDValue Operand, SelectionDAG &DAG,
5878 int &ExtraSteps) {
5879 EVT VT = Operand.getValueType();
5880 if (ST->hasNEON() &&
5881 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
5882 VT == MVT::f32 || VT == MVT::v1f32 ||
5883 VT == MVT::v2f32 || VT == MVT::v4f32)) {
5884 if