Bug Summary

File:llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1149, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/StringSwitch.h"
31#include "llvm/ADT/Triple.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/CallingConvLower.h"
35#include "llvm/CodeGen/MachineBasicBlock.h"
36#include "llvm/CodeGen/MachineFrameInfo.h"
37#include "llvm/CodeGen/MachineFunction.h"
38#include "llvm/CodeGen/MachineInstr.h"
39#include "llvm/CodeGen/MachineInstrBuilder.h"
40#include "llvm/CodeGen/MachineMemOperand.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/RuntimeLibcalls.h"
43#include "llvm/CodeGen/SelectionDAG.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/TargetCallingConv.h"
46#include "llvm/CodeGen/TargetInstrInfo.h"
47#include "llvm/CodeGen/ValueTypes.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugLoc.h"
52#include "llvm/IR/DerivedTypes.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/GetElementPtrTypeIterator.h"
55#include "llvm/IR/GlobalValue.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/Instruction.h"
58#include "llvm/IR/Instructions.h"
59#include "llvm/IR/IntrinsicInst.h"
60#include "llvm/IR/Intrinsics.h"
61#include "llvm/IR/IntrinsicsAArch64.h"
62#include "llvm/IR/Module.h"
63#include "llvm/IR/OperandTraits.h"
64#include "llvm/IR/PatternMatch.h"
65#include "llvm/IR/Type.h"
66#include "llvm/IR/Use.h"
67#include "llvm/IR/Value.h"
68#include "llvm/MC/MCRegisterInfo.h"
69#include "llvm/Support/Casting.h"
70#include "llvm/Support/CodeGen.h"
71#include "llvm/Support/CommandLine.h"
72#include "llvm/Support/Compiler.h"
73#include "llvm/Support/Debug.h"
74#include "llvm/Support/ErrorHandling.h"
75#include "llvm/Support/KnownBits.h"
76#include "llvm/Support/MachineValueType.h"
77#include "llvm/Support/MathExtras.h"
78#include "llvm/Support/raw_ostream.h"
79#include "llvm/Target/TargetMachine.h"
80#include "llvm/Target/TargetOptions.h"
81#include <algorithm>
82#include <bitset>
83#include <cassert>
84#include <cctype>
85#include <cstdint>
86#include <cstdlib>
87#include <iterator>
88#include <limits>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace llvm::PatternMatch;
95
96#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
97
98STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
99STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
100STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
101
102static cl::opt<bool>
103EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
104 cl::desc("Allow AArch64 SLI/SRI formation"),
105 cl::init(false));
106
107// FIXME: The necessary dtprel relocations don't seem to be supported
108// well in the GNU bfd and gold linkers at the moment. Therefore, by
109// default, for now, fall back to GeneralDynamic code generation.
110cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
111 "aarch64-elf-ldtls-generation", cl::Hidden,
112 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
113 cl::init(false));
114
115static cl::opt<bool>
116EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
117 cl::desc("Enable AArch64 logical imm instruction "
118 "optimization"),
119 cl::init(true));
120
121/// Value type used for condition codes.
122static const MVT MVT_CC = MVT::i32;
123
124AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
125 const AArch64Subtarget &STI)
126 : TargetLowering(TM), Subtarget(&STI) {
127 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
128 // we have to make something up. Arbitrarily, choose ZeroOrOne.
129 setBooleanContents(ZeroOrOneBooleanContent);
130 // When comparing vectors the result sets the different elements in the
131 // vector to all-one or all-zero.
132 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
133
134 // Set up the register classes.
135 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
136 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
137
138 if (Subtarget->hasFPARMv8()) {
139 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
140 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
141 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
142 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
143 }
144
145 if (Subtarget->hasNEON()) {
146 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
147 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
148 // Someone set us up the NEON.
149 addDRTypeForNEON(MVT::v2f32);
150 addDRTypeForNEON(MVT::v8i8);
151 addDRTypeForNEON(MVT::v4i16);
152 addDRTypeForNEON(MVT::v2i32);
153 addDRTypeForNEON(MVT::v1i64);
154 addDRTypeForNEON(MVT::v1f64);
155 addDRTypeForNEON(MVT::v4f16);
156
157 addQRTypeForNEON(MVT::v4f32);
158 addQRTypeForNEON(MVT::v2f64);
159 addQRTypeForNEON(MVT::v16i8);
160 addQRTypeForNEON(MVT::v8i16);
161 addQRTypeForNEON(MVT::v4i32);
162 addQRTypeForNEON(MVT::v2i64);
163 addQRTypeForNEON(MVT::v8f16);
164 }
165
166 if (Subtarget->hasSVE()) {
167 // Add legal sve predicate types
168 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
169 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
170 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
171 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
172
173 // Add legal sve data types
174 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
175 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
176 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
177 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
178
179 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
180 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
181 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
182 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
183 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
184 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
185
186 for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
187 setOperationAction(ISD::SADDSAT, VT, Legal);
188 setOperationAction(ISD::UADDSAT, VT, Legal);
189 setOperationAction(ISD::SSUBSAT, VT, Legal);
190 setOperationAction(ISD::USUBSAT, VT, Legal);
191 setOperationAction(ISD::SMAX, VT, Legal);
192 setOperationAction(ISD::UMAX, VT, Legal);
193 setOperationAction(ISD::SMIN, VT, Legal);
194 setOperationAction(ISD::UMIN, VT, Legal);
195 }
196
197 for (auto VT :
198 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
199 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
200 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
201 }
202
203 // Compute derived properties from the register classes
204 computeRegisterProperties(Subtarget->getRegisterInfo());
205
206 // Provide all sorts of operation actions
207 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
208 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
209 setOperationAction(ISD::SETCC, MVT::i32, Custom);
210 setOperationAction(ISD::SETCC, MVT::i64, Custom);
211 setOperationAction(ISD::SETCC, MVT::f16, Custom);
212 setOperationAction(ISD::SETCC, MVT::f32, Custom);
213 setOperationAction(ISD::SETCC, MVT::f64, Custom);
214 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
215 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
216 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
217 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
218 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
219 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
220 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
221 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
222 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
223 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
224 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
225 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
226 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
227 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
228 setOperationAction(ISD::SELECT, MVT::i32, Custom);
229 setOperationAction(ISD::SELECT, MVT::i64, Custom);
230 setOperationAction(ISD::SELECT, MVT::f16, Custom);
231 setOperationAction(ISD::SELECT, MVT::f32, Custom);
232 setOperationAction(ISD::SELECT, MVT::f64, Custom);
233 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
234 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
235 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
236 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
237 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
238 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
239 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
240
241 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
242 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
243 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
244
245 setOperationAction(ISD::FREM, MVT::f32, Expand);
246 setOperationAction(ISD::FREM, MVT::f64, Expand);
247 setOperationAction(ISD::FREM, MVT::f80, Expand);
248
249 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
250
251 // Custom lowering hooks are needed for XOR
252 // to fold it into CSINC/CSINV.
253 setOperationAction(ISD::XOR, MVT::i32, Custom);
254 setOperationAction(ISD::XOR, MVT::i64, Custom);
255
256 // Virtually no operation on f128 is legal, but LLVM can't expand them when
257 // there's a valid register class, so we need custom operations in most cases.
258 setOperationAction(ISD::FABS, MVT::f128, Expand);
259 setOperationAction(ISD::FADD, MVT::f128, Custom);
260 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
261 setOperationAction(ISD::FCOS, MVT::f128, Expand);
262 setOperationAction(ISD::FDIV, MVT::f128, Custom);
263 setOperationAction(ISD::FMA, MVT::f128, Expand);
264 setOperationAction(ISD::FMUL, MVT::f128, Custom);
265 setOperationAction(ISD::FNEG, MVT::f128, Expand);
266 setOperationAction(ISD::FPOW, MVT::f128, Expand);
267 setOperationAction(ISD::FREM, MVT::f128, Expand);
268 setOperationAction(ISD::FRINT, MVT::f128, Expand);
269 setOperationAction(ISD::FSIN, MVT::f128, Expand);
270 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
271 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
272 setOperationAction(ISD::FSUB, MVT::f128, Custom);
273 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
274 setOperationAction(ISD::SETCC, MVT::f128, Custom);
275 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
276 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
277 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
278 setOperationAction(ISD::SELECT, MVT::f128, Custom);
279 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
280 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
281
282 // Lowering for many of the conversions is actually specified by the non-f128
283 // type. The LowerXXX function will be trivial when f128 isn't involved.
284 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
285 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
286 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
287 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
288 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
289 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
290 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
291 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
292 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
293 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
294 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
295 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
296 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
297 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
298 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
299 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
300 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
301 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
304 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
305 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
306 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
307 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
308 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
309 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
310 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
311 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
312
313 // Variable arguments.
314 setOperationAction(ISD::VASTART, MVT::Other, Custom);
315 setOperationAction(ISD::VAARG, MVT::Other, Custom);
316 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
317 setOperationAction(ISD::VAEND, MVT::Other, Expand);
318
319 // Variable-sized objects.
320 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
321 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
322
323 if (Subtarget->isTargetWindows())
324 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
325 else
326 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
327
328 // Constant pool entries
329 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
330
331 // BlockAddress
332 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
333
334 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
335 setOperationAction(ISD::ADDC, MVT::i32, Custom);
336 setOperationAction(ISD::ADDE, MVT::i32, Custom);
337 setOperationAction(ISD::SUBC, MVT::i32, Custom);
338 setOperationAction(ISD::SUBE, MVT::i32, Custom);
339 setOperationAction(ISD::ADDC, MVT::i64, Custom);
340 setOperationAction(ISD::ADDE, MVT::i64, Custom);
341 setOperationAction(ISD::SUBC, MVT::i64, Custom);
342 setOperationAction(ISD::SUBE, MVT::i64, Custom);
343
344 // AArch64 lacks both left-rotate and popcount instructions.
345 setOperationAction(ISD::ROTL, MVT::i32, Expand);
346 setOperationAction(ISD::ROTL, MVT::i64, Expand);
347 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
348 setOperationAction(ISD::ROTL, VT, Expand);
349 setOperationAction(ISD::ROTR, VT, Expand);
350 }
351
352 // AArch64 doesn't have {U|S}MUL_LOHI.
353 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
354 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
355
356 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
357 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
358
359 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
360 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
361 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
362 setOperationAction(ISD::SDIVREM, VT, Expand);
363 setOperationAction(ISD::UDIVREM, VT, Expand);
364 }
365 setOperationAction(ISD::SREM, MVT::i32, Expand);
366 setOperationAction(ISD::SREM, MVT::i64, Expand);
367 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
368 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
369 setOperationAction(ISD::UREM, MVT::i32, Expand);
370 setOperationAction(ISD::UREM, MVT::i64, Expand);
371
372 // Custom lower Add/Sub/Mul with overflow.
373 setOperationAction(ISD::SADDO, MVT::i32, Custom);
374 setOperationAction(ISD::SADDO, MVT::i64, Custom);
375 setOperationAction(ISD::UADDO, MVT::i32, Custom);
376 setOperationAction(ISD::UADDO, MVT::i64, Custom);
377 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
378 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
379 setOperationAction(ISD::USUBO, MVT::i32, Custom);
380 setOperationAction(ISD::USUBO, MVT::i64, Custom);
381 setOperationAction(ISD::SMULO, MVT::i32, Custom);
382 setOperationAction(ISD::SMULO, MVT::i64, Custom);
383 setOperationAction(ISD::UMULO, MVT::i32, Custom);
384 setOperationAction(ISD::UMULO, MVT::i64, Custom);
385
386 setOperationAction(ISD::FSIN, MVT::f32, Expand);
387 setOperationAction(ISD::FSIN, MVT::f64, Expand);
388 setOperationAction(ISD::FCOS, MVT::f32, Expand);
389 setOperationAction(ISD::FCOS, MVT::f64, Expand);
390 setOperationAction(ISD::FPOW, MVT::f32, Expand);
391 setOperationAction(ISD::FPOW, MVT::f64, Expand);
392 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
393 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
394 if (Subtarget->hasFullFP16())
395 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
396 else
397 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
398
399 setOperationAction(ISD::FREM, MVT::f16, Promote);
400 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
401 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
402 setOperationAction(ISD::FPOW, MVT::f16, Promote);
403 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
404 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
405 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
406 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
407 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
408 setOperationAction(ISD::FCOS, MVT::f16, Promote);
409 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
410 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
411 setOperationAction(ISD::FSIN, MVT::f16, Promote);
412 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
413 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
414 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
415 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
416 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
417 setOperationAction(ISD::FEXP, MVT::f16, Promote);
418 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
419 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
420 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
421 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
422 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
423 setOperationAction(ISD::FLOG, MVT::f16, Promote);
424 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
425 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
426 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
427 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
428 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
429 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
430 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
431 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
432
433 if (!Subtarget->hasFullFP16()) {
434 setOperationAction(ISD::SELECT, MVT::f16, Promote);
435 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
436 setOperationAction(ISD::SETCC, MVT::f16, Promote);
437 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
438 setOperationAction(ISD::FADD, MVT::f16, Promote);
439 setOperationAction(ISD::FSUB, MVT::f16, Promote);
440 setOperationAction(ISD::FMUL, MVT::f16, Promote);
441 setOperationAction(ISD::FDIV, MVT::f16, Promote);
442 setOperationAction(ISD::FMA, MVT::f16, Promote);
443 setOperationAction(ISD::FNEG, MVT::f16, Promote);
444 setOperationAction(ISD::FABS, MVT::f16, Promote);
445 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
446 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
447 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
448 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
449 setOperationAction(ISD::FRINT, MVT::f16, Promote);
450 setOperationAction(ISD::FROUND, MVT::f16, Promote);
451 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
452 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
453 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
454 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
455 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
456
457 // promote v4f16 to v4f32 when that is known to be safe.
458 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
459 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
460 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
461 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
462 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
463 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
464 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
465 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
466
467 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
468 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
469 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
470 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
471 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
472 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
473 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
474 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
475 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
476 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
477 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
478 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
479 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
480 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
481 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
482
483 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
484 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
485 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
486 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
487 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
488 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
489 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
490 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
491 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
492 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
493 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
494 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
495 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
496 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
497 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
498 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
499 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
500 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
501 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
502 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
503 }
504
505 // AArch64 has implementations of a lot of rounding-like FP operations.
506 for (MVT Ty : {MVT::f32, MVT::f64}) {
507 setOperationAction(ISD::FFLOOR, Ty, Legal);
508 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
509 setOperationAction(ISD::FCEIL, Ty, Legal);
510 setOperationAction(ISD::FRINT, Ty, Legal);
511 setOperationAction(ISD::FTRUNC, Ty, Legal);
512 setOperationAction(ISD::FROUND, Ty, Legal);
513 setOperationAction(ISD::FMINNUM, Ty, Legal);
514 setOperationAction(ISD::FMAXNUM, Ty, Legal);
515 setOperationAction(ISD::FMINIMUM, Ty, Legal);
516 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
517 setOperationAction(ISD::LROUND, Ty, Legal);
518 setOperationAction(ISD::LLROUND, Ty, Legal);
519 setOperationAction(ISD::LRINT, Ty, Legal);
520 setOperationAction(ISD::LLRINT, Ty, Legal);
521 }
522
523 if (Subtarget->hasFullFP16()) {
524 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
525 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
526 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
527 setOperationAction(ISD::FRINT, MVT::f16, Legal);
528 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
529 setOperationAction(ISD::FROUND, MVT::f16, Legal);
530 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
531 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
532 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
533 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
534 }
535
536 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
537
538 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
539
540 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
541 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
542 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
543 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
544 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
545
546 // 128-bit loads and stores can be done without expanding
547 setOperationAction(ISD::LOAD, MVT::i128, Custom);
548 setOperationAction(ISD::STORE, MVT::i128, Custom);
549
550 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
551 // custom lowering, as there are no un-paired non-temporal stores and
552 // legalization will break up 256 bit inputs.
553 setOperationAction(ISD::STORE, MVT::v32i8, Custom);
554 setOperationAction(ISD::STORE, MVT::v16i16, Custom);
555 setOperationAction(ISD::STORE, MVT::v16f16, Custom);
556 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
557 setOperationAction(ISD::STORE, MVT::v8f32, Custom);
558 setOperationAction(ISD::STORE, MVT::v4f64, Custom);
559 setOperationAction(ISD::STORE, MVT::v4i64, Custom);
560
561 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
562 // This requires the Performance Monitors extension.
563 if (Subtarget->hasPerfMon())
564 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
565
566 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
567 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
568 // Issue __sincos_stret if available.
569 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
570 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
571 } else {
572 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
573 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
574 }
575
576 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
577 // MSVCRT doesn't have powi; fall back to pow
578 setLibcallName(RTLIB::POWI_F32, nullptr);
579 setLibcallName(RTLIB::POWI_F64, nullptr);
580 }
581
582 // Make floating-point constants legal for the large code model, so they don't
583 // become loads from the constant pool.
584 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
585 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
586 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
587 }
588
589 // AArch64 does not have floating-point extending loads, i1 sign-extending
590 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
591 for (MVT VT : MVT::fp_valuetypes()) {
592 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
593 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
594 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
595 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
596 }
597 for (MVT VT : MVT::integer_valuetypes())
598 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
599
600 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
601 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
602 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
603 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
604 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
605 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
606 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
607
608 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
609 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
610
611 // Indexed loads and stores are supported.
612 for (unsigned im = (unsigned)ISD::PRE_INC;
613 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
614 setIndexedLoadAction(im, MVT::i8, Legal);
615 setIndexedLoadAction(im, MVT::i16, Legal);
616 setIndexedLoadAction(im, MVT::i32, Legal);
617 setIndexedLoadAction(im, MVT::i64, Legal);
618 setIndexedLoadAction(im, MVT::f64, Legal);
619 setIndexedLoadAction(im, MVT::f32, Legal);
620 setIndexedLoadAction(im, MVT::f16, Legal);
621 setIndexedStoreAction(im, MVT::i8, Legal);
622 setIndexedStoreAction(im, MVT::i16, Legal);
623 setIndexedStoreAction(im, MVT::i32, Legal);
624 setIndexedStoreAction(im, MVT::i64, Legal);
625 setIndexedStoreAction(im, MVT::f64, Legal);
626 setIndexedStoreAction(im, MVT::f32, Legal);
627 setIndexedStoreAction(im, MVT::f16, Legal);
628 }
629
630 // Trap.
631 setOperationAction(ISD::TRAP, MVT::Other, Legal);
632 if (Subtarget->isTargetWindows())
633 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
634
635 // We combine OR nodes for bitfield operations.
636 setTargetDAGCombine(ISD::OR);
637 // Try to create BICs for vector ANDs.
638 setTargetDAGCombine(ISD::AND);
639
640 // Vector add and sub nodes may conceal a high-half opportunity.
641 // Also, try to fold ADD into CSINC/CSINV..
642 setTargetDAGCombine(ISD::ADD);
643 setTargetDAGCombine(ISD::SUB);
644 setTargetDAGCombine(ISD::SRL);
645 setTargetDAGCombine(ISD::XOR);
646 setTargetDAGCombine(ISD::SINT_TO_FP);
647 setTargetDAGCombine(ISD::UINT_TO_FP);
648
649 setTargetDAGCombine(ISD::FP_TO_SINT);
650 setTargetDAGCombine(ISD::FP_TO_UINT);
651 setTargetDAGCombine(ISD::FDIV);
652
653 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
654
655 setTargetDAGCombine(ISD::ANY_EXTEND);
656 setTargetDAGCombine(ISD::ZERO_EXTEND);
657 setTargetDAGCombine(ISD::SIGN_EXTEND);
658 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
659 setTargetDAGCombine(ISD::CONCAT_VECTORS);
660 setTargetDAGCombine(ISD::STORE);
661 if (Subtarget->supportsAddressTopByteIgnored())
662 setTargetDAGCombine(ISD::LOAD);
663
664 setTargetDAGCombine(ISD::MUL);
665
666 setTargetDAGCombine(ISD::SELECT);
667 setTargetDAGCombine(ISD::VSELECT);
668
669 setTargetDAGCombine(ISD::INTRINSIC_VOID);
670 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
671 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
672
673 setTargetDAGCombine(ISD::GlobalAddress);
674
675 // In case of strict alignment, avoid an excessive number of byte wide stores.
676 MaxStoresPerMemsetOptSize = 8;
677 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
678 ? MaxStoresPerMemsetOptSize : 32;
679
680 MaxGluedStoresPerMemcpy = 4;
681 MaxStoresPerMemcpyOptSize = 4;
682 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
683 ? MaxStoresPerMemcpyOptSize : 16;
684
685 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
686
687 MaxLoadsPerMemcmpOptSize = 4;
688 MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
689 ? MaxLoadsPerMemcmpOptSize : 8;
690
691 setStackPointerRegisterToSaveRestore(AArch64::SP);
692
693 setSchedulingPreference(Sched::Hybrid);
694
695 EnableExtLdPromotion = true;
696
697 // Set required alignment.
698 setMinFunctionAlignment(Align(4));
699 // Set preferred alignments.
700 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
701 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
702
703 // Only change the limit for entries in a jump table if specified by
704 // the sub target, but not at the command line.
705 unsigned MaxJT = STI.getMaximumJumpTableSize();
706 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
707 setMaximumJumpTableSize(MaxJT);
708
709 setHasExtractBitsInsn(true);
710
711 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
712
713 if (Subtarget->hasNEON()) {
714 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
715 // silliness like this:
716 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
717 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
718 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
719 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
720 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
721 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
722 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
723 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
724 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
725 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
726 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
727 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
728 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
729 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
730 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
731 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
732 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
733 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
734 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
735 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
736 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
737 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
738 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
739 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
740 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
741
742 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
743 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
744 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
745 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
746 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
747
748 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
749
750 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
751 // elements smaller than i32, so promote the input to i32 first.
752 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
753 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
754 // i8 vector elements also need promotion to i32 for v8i8
755 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
756 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
757 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
758 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
759 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
760 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
761 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
762 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
763 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
764 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
765 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
766
767 if (Subtarget->hasFullFP16()) {
768 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
769 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
770 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
771 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
772 } else {
773 // when AArch64 doesn't have fullfp16 support, promote the input
774 // to i32 first.
775 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
776 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
777 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
778 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
779 }
780
781 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
782 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
783
784 // AArch64 doesn't have MUL.2d:
785 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
786 // Custom handling for some quad-vector types to detect MULL.
787 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
788 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
789 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
790
791 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
792 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
793 // Vector reductions
794 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
795 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
796 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
797 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
798 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
799
800 // Saturates
801 setOperationAction(ISD::SADDSAT, VT, Legal);
802 setOperationAction(ISD::UADDSAT, VT, Legal);
803 setOperationAction(ISD::SSUBSAT, VT, Legal);
804 setOperationAction(ISD::USUBSAT, VT, Legal);
805 }
806 for (MVT VT : { MVT::v4f16, MVT::v2f32,
807 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
808 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
809 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
810 }
811
812 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
813 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
814 // Likewise, narrowing and extending vector loads/stores aren't handled
815 // directly.
816 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
817 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
818
819 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
820 setOperationAction(ISD::MULHS, VT, Legal);
821 setOperationAction(ISD::MULHU, VT, Legal);
822 } else {
823 setOperationAction(ISD::MULHS, VT, Expand);
824 setOperationAction(ISD::MULHU, VT, Expand);
825 }
826 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
827 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
828
829 setOperationAction(ISD::BSWAP, VT, Expand);
830 setOperationAction(ISD::CTTZ, VT, Expand);
831
832 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
833 setTruncStoreAction(VT, InnerVT, Expand);
834 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
835 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
836 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
837 }
838 }
839
840 // AArch64 has implementations of a lot of rounding-like FP operations.
841 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
842 setOperationAction(ISD::FFLOOR, Ty, Legal);
843 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
844 setOperationAction(ISD::FCEIL, Ty, Legal);
845 setOperationAction(ISD::FRINT, Ty, Legal);
846 setOperationAction(ISD::FTRUNC, Ty, Legal);
847 setOperationAction(ISD::FROUND, Ty, Legal);
848 }
849
850 if (Subtarget->hasFullFP16()) {
851 for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
852 setOperationAction(ISD::FFLOOR, Ty, Legal);
853 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
854 setOperationAction(ISD::FCEIL, Ty, Legal);
855 setOperationAction(ISD::FRINT, Ty, Legal);
856 setOperationAction(ISD::FTRUNC, Ty, Legal);
857 setOperationAction(ISD::FROUND, Ty, Legal);
858 }
859 }
860
861 if (Subtarget->hasSVE())
862 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
863
864 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
865 }
866
867 if (Subtarget->hasSVE()) {
868 // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
869 // splat of 0 or undef) once vector selects supported in SVE codegen. See
870 // D68877 for more details.
871 for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
872 if (isTypeLegal(VT))
873 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
874 }
875 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
876 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
877
878 for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
879 if (isTypeLegal(VT)) {
880 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
881 }
882 }
883 }
884
885 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
886}
887
888void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
889 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 889, __PRETTY_FUNCTION__))
;
890
891 if (VT.isFloatingPoint()) {
892 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
893 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
894 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
895 }
896
897 // Mark vector float intrinsics as expand.
898 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
899 setOperationAction(ISD::FSIN, VT, Expand);
900 setOperationAction(ISD::FCOS, VT, Expand);
901 setOperationAction(ISD::FPOW, VT, Expand);
902 setOperationAction(ISD::FLOG, VT, Expand);
903 setOperationAction(ISD::FLOG2, VT, Expand);
904 setOperationAction(ISD::FLOG10, VT, Expand);
905 setOperationAction(ISD::FEXP, VT, Expand);
906 setOperationAction(ISD::FEXP2, VT, Expand);
907
908 // But we do support custom-lowering for FCOPYSIGN.
909 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
910 }
911
912 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
913 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
914 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
915 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
916 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
917 setOperationAction(ISD::SRA, VT, Custom);
918 setOperationAction(ISD::SRL, VT, Custom);
919 setOperationAction(ISD::SHL, VT, Custom);
920 setOperationAction(ISD::OR, VT, Custom);
921 setOperationAction(ISD::SETCC, VT, Custom);
922 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
923
924 setOperationAction(ISD::SELECT, VT, Expand);
925 setOperationAction(ISD::SELECT_CC, VT, Expand);
926 setOperationAction(ISD::VSELECT, VT, Expand);
927 for (MVT InnerVT : MVT::all_valuetypes())
928 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
929
930 // CNT supports only B element sizes, then use UADDLP to widen.
931 if (VT != MVT::v8i8 && VT != MVT::v16i8)
932 setOperationAction(ISD::CTPOP, VT, Custom);
933
934 setOperationAction(ISD::UDIV, VT, Expand);
935 setOperationAction(ISD::SDIV, VT, Expand);
936 setOperationAction(ISD::UREM, VT, Expand);
937 setOperationAction(ISD::SREM, VT, Expand);
938 setOperationAction(ISD::FREM, VT, Expand);
939
940 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
941 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
942
943 if (!VT.isFloatingPoint())
944 setOperationAction(ISD::ABS, VT, Legal);
945
946 // [SU][MIN|MAX] are available for all NEON types apart from i64.
947 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
948 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
949 setOperationAction(Opcode, VT, Legal);
950
951 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
952 if (VT.isFloatingPoint() &&
953 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
954 for (unsigned Opcode :
955 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
956 setOperationAction(Opcode, VT, Legal);
957
958 if (Subtarget->isLittleEndian()) {
959 for (unsigned im = (unsigned)ISD::PRE_INC;
960 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
961 setIndexedLoadAction(im, VT, Legal);
962 setIndexedStoreAction(im, VT, Legal);
963 }
964 }
965}
966
967void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
968 addRegisterClass(VT, &AArch64::FPR64RegClass);
969 addTypeForNEON(VT, MVT::v2i32);
970}
971
972void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
973 addRegisterClass(VT, &AArch64::FPR128RegClass);
974 addTypeForNEON(VT, MVT::v4i32);
975}
976
977EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
978 EVT VT) const {
979 if (!VT.isVector())
980 return MVT::i32;
981 return VT.changeVectorElementTypeToInteger();
982}
983
984static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
985 const APInt &Demanded,
986 TargetLowering::TargetLoweringOpt &TLO,
987 unsigned NewOpc) {
988 uint64_t OldImm = Imm, NewImm, Enc;
989 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
990
991 // Return if the immediate is already all zeros, all ones, a bimm32 or a
992 // bimm64.
993 if (Imm == 0 || Imm == Mask ||
994 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
995 return false;
996
997 unsigned EltSize = Size;
998 uint64_t DemandedBits = Demanded.getZExtValue();
999
1000 // Clear bits that are not demanded.
1001 Imm &= DemandedBits;
1002
1003 while (true) {
1004 // The goal here is to set the non-demanded bits in a way that minimizes
1005 // the number of switching between 0 and 1. In order to achieve this goal,
1006 // we set the non-demanded bits to the value of the preceding demanded bits.
1007 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1008 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1009 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1010 // The final result is 0b11000011.
1011 uint64_t NonDemandedBits = ~DemandedBits;
1012 uint64_t InvertedImm = ~Imm & DemandedBits;
1013 uint64_t RotatedImm =
1014 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1015 NonDemandedBits;
1016 uint64_t Sum = RotatedImm + NonDemandedBits;
1017 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1018 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1019 NewImm = (Imm | Ones) & Mask;
1020
1021 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1022 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1023 // we halve the element size and continue the search.
1024 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1025 break;
1026
1027 // We cannot shrink the element size any further if it is 2-bits.
1028 if (EltSize == 2)
1029 return false;
1030
1031 EltSize /= 2;
1032 Mask >>= EltSize;
1033 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1034
1035 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1036 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1037 return false;
1038
1039 // Merge the upper and lower halves of Imm and DemandedBits.
1040 Imm |= Hi;
1041 DemandedBits |= DemandedBitsHi;
1042 }
1043
1044 ++NumOptimizedImms;
1045
1046 // Replicate the element across the register width.
1047 while (EltSize < Size) {
1048 NewImm |= NewImm << EltSize;
1049 EltSize *= 2;
1050 }
1051
1052 (void)OldImm;
1053 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1054, __PRETTY_FUNCTION__))
1054 "demanded bits should never be altered")((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1054, __PRETTY_FUNCTION__))
;
1055 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")((OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? static_cast<void> (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1055, __PRETTY_FUNCTION__))
;
1056
1057 // Create the new constant immediate node.
1058 EVT VT = Op.getValueType();
1059 SDLoc DL(Op);
1060 SDValue New;
1061
1062 // If the new constant immediate is all-zeros or all-ones, let the target
1063 // independent DAG combine optimize this node.
1064 if (NewImm == 0 || NewImm == OrigMask) {
1065 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1066 TLO.DAG.getConstant(NewImm, DL, VT));
1067 // Otherwise, create a machine node so that target independent DAG combine
1068 // doesn't undo this optimization.
1069 } else {
1070 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1071 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1072 New = SDValue(
1073 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1074 }
1075
1076 return TLO.CombineTo(Op, New);
1077}
1078
1079bool AArch64TargetLowering::targetShrinkDemandedConstant(
1080 SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
1081 // Delay this optimization to as late as possible.
1082 if (!TLO.LegalOps)
1083 return false;
1084
1085 if (!EnableOptimizeLogicalImm)
1086 return false;
1087
1088 EVT VT = Op.getValueType();
1089 if (VT.isVector())
1090 return false;
1091
1092 unsigned Size = VT.getSizeInBits();
1093 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1094, __PRETTY_FUNCTION__))
1094 "i32 or i64 is expected after legalization.")(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1094, __PRETTY_FUNCTION__))
;
1095
1096 // Exit early if we demand all bits.
1097 if (Demanded.countPopulation() == Size)
1098 return false;
1099
1100 unsigned NewOpc;
1101 switch (Op.getOpcode()) {
1102 default:
1103 return false;
1104 case ISD::AND:
1105 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1106 break;
1107 case ISD::OR:
1108 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1109 break;
1110 case ISD::XOR:
1111 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1112 break;
1113 }
1114 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1115 if (!C)
1116 return false;
1117 uint64_t Imm = C->getZExtValue();
1118 return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
1119}
1120
1121/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1122/// Mask are known to be either zero or one and return them Known.
1123void AArch64TargetLowering::computeKnownBitsForTargetNode(
1124 const SDValue Op, KnownBits &Known,
1125 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1126 switch (Op.getOpcode()) {
1127 default:
1128 break;
1129 case AArch64ISD::CSEL: {
1130 KnownBits Known2;
1131 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1132 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1133 Known.Zero &= Known2.Zero;
1134 Known.One &= Known2.One;
1135 break;
1136 }
1137 case AArch64ISD::LOADgot:
1138 case AArch64ISD::ADDlow: {
1139 if (!Subtarget->isTargetILP32())
1140 break;
1141 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1142 Known.Zero = APInt::getHighBitsSet(64, 32);
1143 break;
1144 }
1145 case ISD::INTRINSIC_W_CHAIN: {
1146 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1147 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1148 switch (IntID) {
1149 default: return;
1150 case Intrinsic::aarch64_ldaxr:
1151 case Intrinsic::aarch64_ldxr: {
1152 unsigned BitWidth = Known.getBitWidth();
1153 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1154 unsigned MemBits = VT.getScalarSizeInBits();
1155 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1156 return;
1157 }
1158 }
1159 break;
1160 }
1161 case ISD::INTRINSIC_WO_CHAIN:
1162 case ISD::INTRINSIC_VOID: {
1163 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1164 switch (IntNo) {
1165 default:
1166 break;
1167 case Intrinsic::aarch64_neon_umaxv:
1168 case Intrinsic::aarch64_neon_uminv: {
1169 // Figure out the datatype of the vector operand. The UMINV instruction
1170 // will zero extend the result, so we can mark as known zero all the
1171 // bits larger than the element datatype. 32-bit or larget doesn't need
1172 // this as those are legal types and will be handled by isel directly.
1173 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1174 unsigned BitWidth = Known.getBitWidth();
1175 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1176 assert(BitWidth >= 8 && "Unexpected width!")((BitWidth >= 8 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1176, __PRETTY_FUNCTION__))
;
1177 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1178 Known.Zero |= Mask;
1179 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1180 assert(BitWidth >= 16 && "Unexpected width!")((BitWidth >= 16 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1180, __PRETTY_FUNCTION__))
;
1181 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1182 Known.Zero |= Mask;
1183 }
1184 break;
1185 } break;
1186 }
1187 }
1188 }
1189}
1190
1191MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1192 EVT) const {
1193 return MVT::i64;
1194}
1195
1196bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1197 EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1198 bool *Fast) const {
1199 if (Subtarget->requiresStrictAlign())
1200 return false;
1201
1202 if (Fast) {
1203 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1204 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1205 // See comments in performSTORECombine() for more details about
1206 // these conditions.
1207
1208 // Code that uses clang vector extensions can mark that it
1209 // wants unaligned accesses to be treated as fast by
1210 // underspecifying alignment to be 1 or 2.
1211 Align <= 2 ||
1212
1213 // Disregard v2i64. Memcpy lowering produces those and splitting
1214 // them regresses performance on micro-benchmarks and olden/bh.
1215 VT == MVT::v2i64;
1216 }
1217 return true;
1218}
1219
1220// Same as above but handling LLTs instead.
1221bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1222 LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1223 bool *Fast) const {
1224 if (Subtarget->requiresStrictAlign())
1225 return false;
1226
1227 if (Fast) {
1228 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1229 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1230 Ty.getSizeInBytes() != 16 ||
1231 // See comments in performSTORECombine() for more details about
1232 // these conditions.
1233
1234 // Code that uses clang vector extensions can mark that it
1235 // wants unaligned accesses to be treated as fast by
1236 // underspecifying alignment to be 1 or 2.
1237 Align <= 2 ||
1238
1239 // Disregard v2i64. Memcpy lowering produces those and splitting
1240 // them regresses performance on micro-benchmarks and olden/bh.
1241 Ty == LLT::vector(2, 64);
1242 }
1243 return true;
1244}
1245
1246FastISel *
1247AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1248 const TargetLibraryInfo *libInfo) const {
1249 return AArch64::createFastISel(funcInfo, libInfo);
1250}
1251
1252const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1253 switch ((AArch64ISD::NodeType)Opcode) {
1254 case AArch64ISD::FIRST_NUMBER: break;
1255 case AArch64ISD::CALL: return "AArch64ISD::CALL";
1256 case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
1257 case AArch64ISD::ADR: return "AArch64ISD::ADR";
1258 case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
1259 case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
1260 case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
1261 case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
1262 case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
1263 case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
1264 case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
1265 case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
1266 case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
1267 case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
1268 case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1269 case AArch64ISD::ADC: return "AArch64ISD::ADC";
1270 case AArch64ISD::SBC: return "AArch64ISD::SBC";
1271 case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
1272 case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
1273 case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
1274 case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
1275 case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
1276 case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
1277 case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
1278 case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
1279 case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
1280 case AArch64ISD::STRICT_FCMP: return "AArch64ISD::STRICT_FCMP";
1281 case AArch64ISD::STRICT_FCMPE: return "AArch64ISD::STRICT_FCMPE";
1282 case AArch64ISD::DUP: return "AArch64ISD::DUP";
1283 case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
1284 case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
1285 case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
1286 case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
1287 case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
1288 case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
1289 case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
1290 case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
1291 case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
1292 case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
1293 case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
1294 case AArch64ISD::BICi: return "AArch64ISD::BICi";
1295 case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
1296 case AArch64ISD::BSP: return "AArch64ISD::BSP";
1297 case AArch64ISD::NEG: return "AArch64ISD::NEG";
1298 case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
1299 case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
1300 case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
1301 case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
1302 case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
1303 case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
1304 case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
1305 case AArch64ISD::REV16: return "AArch64ISD::REV16";
1306 case AArch64ISD::REV32: return "AArch64ISD::REV32";
1307 case AArch64ISD::REV64: return "AArch64ISD::REV64";
1308 case AArch64ISD::EXT: return "AArch64ISD::EXT";
1309 case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
1310 case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
1311 case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
1312 case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
1313 case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
1314 case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
1315 case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
1316 case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
1317 case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
1318 case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
1319 case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
1320 case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
1321 case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
1322 case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
1323 case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
1324 case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
1325 case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
1326 case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
1327 case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
1328 case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
1329 case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
1330 case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
1331 case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
1332 case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
1333 case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
1334 case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
1335 case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
1336 case AArch64ISD::SMAXV_PRED: return "AArch64ISD::SMAXV_PRED";
1337 case AArch64ISD::UMAXV_PRED: return "AArch64ISD::UMAXV_PRED";
1338 case AArch64ISD::SMINV_PRED: return "AArch64ISD::SMINV_PRED";
1339 case AArch64ISD::UMINV_PRED: return "AArch64ISD::UMINV_PRED";
1340 case AArch64ISD::ORV_PRED: return "AArch64ISD::ORV_PRED";
1341 case AArch64ISD::EORV_PRED: return "AArch64ISD::EORV_PRED";
1342 case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED";
1343 case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N";
1344 case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N";
1345 case AArch64ISD::LASTA: return "AArch64ISD::LASTA";
1346 case AArch64ISD::LASTB: return "AArch64ISD::LASTB";
1347 case AArch64ISD::REV: return "AArch64ISD::REV";
1348 case AArch64ISD::REINTERPRET_CAST: return "AArch64ISD::REINTERPRET_CAST";
1349 case AArch64ISD::TBL: return "AArch64ISD::TBL";
1350 case AArch64ISD::NOT: return "AArch64ISD::NOT";
1351 case AArch64ISD::BIT: return "AArch64ISD::BIT";
1352 case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
1353 case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
1354 case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
1355 case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
1356 case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
1357 case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
1358 case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
1359 case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
1360 case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
1361 case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
1362 case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
1363 case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
1364 case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
1365 case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
1366 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
1367 case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
1368 case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
1369 case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
1370 case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
1371 case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
1372 case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
1373 case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
1374 case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
1375 case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
1376 case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
1377 case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
1378 case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1379 case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1380 case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1381 case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1382 case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1383 case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1384 case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1385 case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1386 case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1387 case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1388 case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1389 case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1390 case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1391 case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1392 case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1393 case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1394 case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1395 case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1396 case AArch64ISD::STG: return "AArch64ISD::STG";
1397 case AArch64ISD::STZG: return "AArch64ISD::STZG";
1398 case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
1399 case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
1400 case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI";
1401 case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";
1402 case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
1403 case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
1404 case AArch64ISD::INSR: return "AArch64ISD::INSR";
1405 case AArch64ISD::PTEST: return "AArch64ISD::PTEST";
1406 case AArch64ISD::PTRUE: return "AArch64ISD::PTRUE";
1407 case AArch64ISD::LDNF1: return "AArch64ISD::LDNF1";
1408 case AArch64ISD::LDNF1S: return "AArch64ISD::LDNF1S";
1409 case AArch64ISD::LDFF1: return "AArch64ISD::LDFF1";
1410 case AArch64ISD::LDFF1S: return "AArch64ISD::LDFF1S";
1411 case AArch64ISD::GLD1: return "AArch64ISD::GLD1";
1412 case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED";
1413 case AArch64ISD::GLD1_SXTW: return "AArch64ISD::GLD1_SXTW";
1414 case AArch64ISD::GLD1_UXTW: return "AArch64ISD::GLD1_UXTW";
1415 case AArch64ISD::GLD1_SXTW_SCALED: return "AArch64ISD::GLD1_SXTW_SCALED";
1416 case AArch64ISD::GLD1_UXTW_SCALED: return "AArch64ISD::GLD1_UXTW_SCALED";
1417 case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM";
1418 case AArch64ISD::GLD1S: return "AArch64ISD::GLD1S";
1419 case AArch64ISD::GLD1S_SCALED: return "AArch64ISD::GLD1S_SCALED";
1420 case AArch64ISD::GLD1S_SXTW: return "AArch64ISD::GLD1S_SXTW";
1421 case AArch64ISD::GLD1S_UXTW: return "AArch64ISD::GLD1S_UXTW";
1422 case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";
1423 case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";
1424 case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";
1425 case AArch64ISD::GLDFF1: return "AArch64ISD::GLDFF1";
1426 case AArch64ISD::GLDFF1_SCALED: return "AArch64ISD::GLDFF1_SCALED";
1427 case AArch64ISD::GLDFF1_SXTW: return "AArch64ISD::GLDFF1_SXTW";
1428 case AArch64ISD::GLDFF1_UXTW: return "AArch64ISD::GLDFF1_UXTW";
1429 case AArch64ISD::GLDFF1_SXTW_SCALED:return "AArch64ISD::GLDFF1_SXTW_SCALED";
1430 case AArch64ISD::GLDFF1_UXTW_SCALED:return "AArch64ISD::GLDFF1_UXTW_SCALED";
1431 case AArch64ISD::GLDFF1_IMM: return "AArch64ISD::GLDFF1_IMM";
1432 case AArch64ISD::GLDFF1S: return "AArch64ISD::GLDFF1S";
1433 case AArch64ISD::GLDFF1S_SCALED: return "AArch64ISD::GLDFF1S_SCALED";
1434 case AArch64ISD::GLDFF1S_SXTW: return "AArch64ISD::GLDFF1S_SXTW";
1435 case AArch64ISD::GLDFF1S_UXTW: return "AArch64ISD::GLDFF1S_UXTW";
1436 case AArch64ISD::GLDFF1S_SXTW_SCALED:
1437 return "AArch64ISD::GLDFF1S_SXTW_SCALED";
1438 case AArch64ISD::GLDFF1S_UXTW_SCALED:
1439 return "AArch64ISD::GLDFF1S_UXTW_SCALED";
1440 case AArch64ISD::GLDFF1S_IMM: return "AArch64ISD::GLDFF1S_IMM";
1441
1442 case AArch64ISD::GLDNT1: return "AArch64ISD::GLDNT1";
1443 case AArch64ISD::GLDNT1S: return "AArch64ISD::GLDNT1S";
1444
1445 case AArch64ISD::SST1: return "AArch64ISD::SST1";
1446 case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
1447 case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
1448 case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
1449 case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
1450 case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
1451 case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";
1452
1453 case AArch64ISD::SSTNT1: return "AArch64ISD::SSTNT1";
1454
1455 case AArch64ISD::LDP: return "AArch64ISD::LDP";
1456 case AArch64ISD::STP: return "AArch64ISD::STP";
1457 case AArch64ISD::STNP: return "AArch64ISD::STNP";
1458 case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED";
1459 case AArch64ISD::INDEX_VECTOR: return "AArch64ISD::INDEX_VECTOR";
1460 }
1461 return nullptr;
1462}
1463
1464MachineBasicBlock *
1465AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
1466 MachineBasicBlock *MBB) const {
1467 // We materialise the F128CSEL pseudo-instruction as some control flow and a
1468 // phi node:
1469
1470 // OrigBB:
1471 // [... previous instrs leading to comparison ...]
1472 // b.ne TrueBB
1473 // b EndBB
1474 // TrueBB:
1475 // ; Fallthrough
1476 // EndBB:
1477 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1478
1479 MachineFunction *MF = MBB->getParent();
1480 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1481 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1482 DebugLoc DL = MI.getDebugLoc();
1483 MachineFunction::iterator It = ++MBB->getIterator();
1484
1485 Register DestReg = MI.getOperand(0).getReg();
1486 Register IfTrueReg = MI.getOperand(1).getReg();
1487 Register IfFalseReg = MI.getOperand(2).getReg();
1488 unsigned CondCode = MI.getOperand(3).getImm();
1489 bool NZCVKilled = MI.getOperand(4).isKill();
1490
1491 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1492 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1493 MF->insert(It, TrueBB);
1494 MF->insert(It, EndBB);
1495
1496 // Transfer rest of current basic-block to EndBB
1497 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1498 MBB->end());
1499 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1500
1501 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1502 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1503 MBB->addSuccessor(TrueBB);
1504 MBB->addSuccessor(EndBB);
1505
1506 // TrueBB falls through to the end.
1507 TrueBB->addSuccessor(EndBB);
1508
1509 if (!NZCVKilled) {
1510 TrueBB->addLiveIn(AArch64::NZCV);
1511 EndBB->addLiveIn(AArch64::NZCV);
1512 }
1513
1514 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1515 .addReg(IfTrueReg)
1516 .addMBB(TrueBB)
1517 .addReg(IfFalseReg)
1518 .addMBB(MBB);
1519
1520 MI.eraseFromParent();
1521 return EndBB;
1522}
1523
1524MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
1525 MachineInstr &MI, MachineBasicBlock *BB) const {
1526 assert(!isAsynchronousEHPersonality(classifyEHPersonality(((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1528, __PRETTY_FUNCTION__))
1527 BB->getParent()->getFunction().getPersonalityFn())) &&((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1528, __PRETTY_FUNCTION__))
1528 "SEH does not use catchret!")((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1528, __PRETTY_FUNCTION__))
;
1529 return BB;
1530}
1531
1532MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
1533 MachineInstr &MI, MachineBasicBlock *BB) const {
1534 switch (MI.getOpcode()) {
1535 default:
1536#ifndef NDEBUG
1537 MI.dump();
1538#endif
1539 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1539)
;
1540
1541 case AArch64::F128CSEL:
1542 return EmitF128CSEL(MI, BB);
1543
1544 case TargetOpcode::STACKMAP:
1545 case TargetOpcode::PATCHPOINT:
1546 return emitPatchPoint(MI, BB);
1547
1548 case AArch64::CATCHRET:
1549 return EmitLoweredCatchRet(MI, BB);
1550 }
1551}
1552
1553//===----------------------------------------------------------------------===//
1554// AArch64 Lowering private implementation.
1555//===----------------------------------------------------------------------===//
1556
1557//===----------------------------------------------------------------------===//
1558// Lowering Code
1559//===----------------------------------------------------------------------===//
1560
1561/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1562/// CC
1563static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1564 switch (CC) {
1565 default:
1566 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1566)
;
1567 case ISD::SETNE:
1568 return AArch64CC::NE;
1569 case ISD::SETEQ:
1570 return AArch64CC::EQ;
1571 case ISD::SETGT:
1572 return AArch64CC::GT;
1573 case ISD::SETGE:
1574 return AArch64CC::GE;
1575 case ISD::SETLT:
1576 return AArch64CC::LT;
1577 case ISD::SETLE:
1578 return AArch64CC::LE;
1579 case ISD::SETUGT:
1580 return AArch64CC::HI;
1581 case ISD::SETUGE:
1582 return AArch64CC::HS;
1583 case ISD::SETULT:
1584 return AArch64CC::LO;
1585 case ISD::SETULE:
1586 return AArch64CC::LS;
1587 }
1588}
1589
1590/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1591static void changeFPCCToAArch64CC(ISD::CondCode CC,
1592 AArch64CC::CondCode &CondCode,
1593 AArch64CC::CondCode &CondCode2) {
1594 CondCode2 = AArch64CC::AL;
1595 switch (CC) {
1596 default:
1597 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1597)
;
1598 case ISD::SETEQ:
1599 case ISD::SETOEQ:
1600 CondCode = AArch64CC::EQ;
1601 break;
1602 case ISD::SETGT:
1603 case ISD::SETOGT:
1604 CondCode = AArch64CC::GT;
1605 break;
1606 case ISD::SETGE:
1607 case ISD::SETOGE:
1608 CondCode = AArch64CC::GE;
1609 break;
1610 case ISD::SETOLT:
1611 CondCode = AArch64CC::MI;
1612 break;
1613 case ISD::SETOLE:
1614 CondCode = AArch64CC::LS;
1615 break;
1616 case ISD::SETONE:
1617 CondCode = AArch64CC::MI;
1618 CondCode2 = AArch64CC::GT;
1619 break;
1620 case ISD::SETO:
1621 CondCode = AArch64CC::VC;
1622 break;
1623 case ISD::SETUO:
1624 CondCode = AArch64CC::VS;
1625 break;
1626 case ISD::SETUEQ:
1627 CondCode = AArch64CC::EQ;
1628 CondCode2 = AArch64CC::VS;
1629 break;
1630 case ISD::SETUGT:
1631 CondCode = AArch64CC::HI;
1632 break;
1633 case ISD::SETUGE:
1634 CondCode = AArch64CC::PL;
1635 break;
1636 case ISD::SETLT:
1637 case ISD::SETULT:
1638 CondCode = AArch64CC::LT;
1639 break;
1640 case ISD::SETLE:
1641 case ISD::SETULE:
1642 CondCode = AArch64CC::LE;
1643 break;
1644 case ISD::SETNE:
1645 case ISD::SETUNE:
1646 CondCode = AArch64CC::NE;
1647 break;
1648 }
1649}
1650
1651/// Convert a DAG fp condition code to an AArch64 CC.
1652/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1653/// should be AND'ed instead of OR'ed.
1654static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1655 AArch64CC::CondCode &CondCode,
1656 AArch64CC::CondCode &CondCode2) {
1657 CondCode2 = AArch64CC::AL;
1658 switch (CC) {
1659 default:
1660 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1661 assert(CondCode2 == AArch64CC::AL)((CondCode2 == AArch64CC::AL) ? static_cast<void> (0) :
__assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1661, __PRETTY_FUNCTION__))
;
1662 break;
1663 case ISD::SETONE:
1664 // (a one b)
1665 // == ((a olt b) || (a ogt b))
1666 // == ((a ord b) && (a une b))
1667 CondCode = AArch64CC::VC;
1668 CondCode2 = AArch64CC::NE;
1669 break;
1670 case ISD::SETUEQ:
1671 // (a ueq b)
1672 // == ((a uno b) || (a oeq b))
1673 // == ((a ule b) && (a uge b))
1674 CondCode = AArch64CC::PL;
1675 CondCode2 = AArch64CC::LE;
1676 break;
1677 }
1678}
1679
1680/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1681/// CC usable with the vector instructions. Fewer operations are available
1682/// without a real NZCV register, so we have to use less efficient combinations
1683/// to get the same effect.
1684static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1685 AArch64CC::CondCode &CondCode,
1686 AArch64CC::CondCode &CondCode2,
1687 bool &Invert) {
1688 Invert = false;
1689 switch (CC) {
1690 default:
1691 // Mostly the scalar mappings work fine.
1692 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1693 break;
1694 case ISD::SETUO:
1695 Invert = true;
1696 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1697 case ISD::SETO:
1698 CondCode = AArch64CC::MI;
1699 CondCode2 = AArch64CC::GE;
1700 break;
1701 case ISD::SETUEQ:
1702 case ISD::SETULT:
1703 case ISD::SETULE:
1704 case ISD::SETUGT:
1705 case ISD::SETUGE:
1706 // All of the compare-mask comparisons are ordered, but we can switch
1707 // between the two by a double inversion. E.g. ULE == !OGT.
1708 Invert = true;
1709 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
1710 CondCode, CondCode2);
1711 break;
1712 }
1713}
1714
1715static bool isLegalArithImmed(uint64_t C) {
1716 // Matches AArch64DAGToDAGISel::SelectArithImmed().
1717 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1718 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
1719 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
1720 return IsLegal;
1721}
1722
1723// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
1724// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
1725// can be set differently by this operation. It comes down to whether
1726// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1727// everything is fine. If not then the optimization is wrong. Thus general
1728// comparisons are only valid if op2 != 0.
1729//
1730// So, finally, the only LLVM-native comparisons that don't mention C and V
1731// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1732// the absence of information about op2.
1733static bool isCMN(SDValue Op, ISD::CondCode CC) {
1734 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
1735 (CC == ISD::SETEQ || CC == ISD::SETNE);
1736}
1737
1738static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
1739 SelectionDAG &DAG, SDValue Chain,
1740 bool IsSignaling) {
1741 EVT VT = LHS.getValueType();
1742 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1742, __PRETTY_FUNCTION__))
;
1743 assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented")((VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("VT != MVT::f16 && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1743, __PRETTY_FUNCTION__))
;
1744 unsigned Opcode =
1745 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
1746 return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
1747}
1748
1749static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1750 const SDLoc &dl, SelectionDAG &DAG) {
1751 EVT VT = LHS.getValueType();
1752 const bool FullFP16 =
1753 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1754
1755 if (VT.isFloatingPoint()) {
1756 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1756, __PRETTY_FUNCTION__))
;
1757 if (VT == MVT::f16 && !FullFP16) {
1758 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1759 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1760 VT = MVT::f32;
1761 }
1762 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1763 }
1764
1765 // The CMP instruction is just an alias for SUBS, and representing it as
1766 // SUBS means that it's possible to get CSE with subtract operations.
1767 // A later phase can perform the optimization of setting the destination
1768 // register to WZR/XZR if it ends up being unused.
1769 unsigned Opcode = AArch64ISD::SUBS;
1770
1771 if (isCMN(RHS, CC)) {
1772 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
1773 Opcode = AArch64ISD::ADDS;
1774 RHS = RHS.getOperand(1);
1775 } else if (isCMN(LHS, CC)) {
1776 // As we are looking for EQ/NE compares, the operands can be commuted ; can
1777 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
1778 Opcode = AArch64ISD::ADDS;
1779 LHS = LHS.getOperand(1);
1780 } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
1781 if (LHS.getOpcode() == ISD::AND) {
1782 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1783 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1784 // of the signed comparisons.
1785 const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
1786 DAG.getVTList(VT, MVT_CC),
1787 LHS.getOperand(0),
1788 LHS.getOperand(1));
1789 // Replace all users of (and X, Y) with newly generated (ands X, Y)
1790 DAG.ReplaceAllUsesWith(LHS, ANDSNode);
1791 return ANDSNode.getValue(1);
1792 } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
1793 // Use result of ANDS
1794 return LHS.getValue(1);
1795 }
1796 }
1797
1798 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1799 .getValue(1);
1800}
1801
1802/// \defgroup AArch64CCMP CMP;CCMP matching
1803///
1804/// These functions deal with the formation of CMP;CCMP;... sequences.
1805/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1806/// a comparison. They set the NZCV flags to a predefined value if their
1807/// predicate is false. This allows to express arbitrary conjunctions, for
1808/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
1809/// expressed as:
1810/// cmp A
1811/// ccmp B, inv(CB), CA
1812/// check for CB flags
1813///
1814/// This naturally lets us implement chains of AND operations with SETCC
1815/// operands. And we can even implement some other situations by transforming
1816/// them:
1817/// - We can implement (NEG SETCC) i.e. negating a single comparison by
1818/// negating the flags used in a CCMP/FCCMP operations.
1819/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
1820/// by negating the flags we test for afterwards. i.e.
1821/// NEG (CMP CCMP CCCMP ...) can be implemented.
1822/// - Note that we can only ever negate all previously processed results.
1823/// What we can not implement by flipping the flags to test is a negation
1824/// of two sub-trees (because the negation affects all sub-trees emitted so
1825/// far, so the 2nd sub-tree we emit would also affect the first).
1826/// With those tools we can implement some OR operations:
1827/// - (OR (SETCC A) (SETCC B)) can be implemented via:
1828/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
1829/// - After transforming OR to NEG/AND combinations we may be able to use NEG
1830/// elimination rules from earlier to implement the whole thing as a
1831/// CCMP/FCCMP chain.
1832///
1833/// As complete example:
1834/// or (or (setCA (cmp A)) (setCB (cmp B)))
1835/// (and (setCC (cmp C)) (setCD (cmp D)))"
1836/// can be reassociated to:
1837/// or (and (setCC (cmp C)) setCD (cmp D))
1838// (or (setCA (cmp A)) (setCB (cmp B)))
1839/// can be transformed to:
1840/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
1841/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1842/// which can be implemented as:
1843/// cmp C
1844/// ccmp D, inv(CD), CC
1845/// ccmp A, CA, inv(CD)
1846/// ccmp B, CB, inv(CA)
1847/// check for CB flags
1848///
1849/// A counterexample is "or (and A B) (and C D)" which translates to
1850/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
1851/// can only implement 1 of the inner (not) operations, but not both!
1852/// @{
1853
1854/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1855static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1856 ISD::CondCode CC, SDValue CCOp,
1857 AArch64CC::CondCode Predicate,
1858 AArch64CC::CondCode OutCC,
1859 const SDLoc &DL, SelectionDAG &DAG) {
1860 unsigned Opcode = 0;
1861 const bool FullFP16 =
1862 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1863
1864 if (LHS.getValueType().isFloatingPoint()) {
1865 assert(LHS.getValueType() != MVT::f128)((LHS.getValueType() != MVT::f128) ? static_cast<void> (
0) : __assert_fail ("LHS.getValueType() != MVT::f128", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1865, __PRETTY_FUNCTION__))
;
1866 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
1867 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1868 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1869 }
1870 Opcode = AArch64ISD::FCCMP;
1871 } else if (RHS.getOpcode() == ISD::SUB) {
1872 SDValue SubOp0 = RHS.getOperand(0);
1873 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1874 // See emitComparison() on why we can only do this for SETEQ and SETNE.
1875 Opcode = AArch64ISD::CCMN;
1876 RHS = RHS.getOperand(1);
1877 }
1878 }
1879 if (Opcode == 0)
1880 Opcode = AArch64ISD::CCMP;
1881
1882 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1883 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1884 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1885 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1886 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1887}
1888
1889/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
1890/// expressed as a conjunction. See \ref AArch64CCMP.
1891/// \param CanNegate Set to true if we can negate the whole sub-tree just by
1892/// changing the conditions on the SETCC tests.
1893/// (this means we can call emitConjunctionRec() with
1894/// Negate==true on this sub-tree)
1895/// \param MustBeFirst Set to true if this subtree needs to be negated and we
1896/// cannot do the negation naturally. We are required to
1897/// emit the subtree first in this case.
1898/// \param WillNegate Is true if are called when the result of this
1899/// subexpression must be negated. This happens when the
1900/// outer expression is an OR. We can use this fact to know
1901/// that we have a double negation (or (or ...) ...) that
1902/// can be implemented for free.
1903static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
1904 bool &MustBeFirst, bool WillNegate,
1905 unsigned Depth = 0) {
1906 if (!Val.hasOneUse())
1907 return false;
1908 unsigned Opcode = Val->getOpcode();
1909 if (Opcode == ISD::SETCC) {
1910 if (Val->getOperand(0).getValueType() == MVT::f128)
1911 return false;
1912 CanNegate = true;
1913 MustBeFirst = false;
1914 return true;
1915 }
1916 // Protect against exponential runtime and stack overflow.
1917 if (Depth > 6)
1918 return false;
1919 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1920 bool IsOR = Opcode == ISD::OR;
1921 SDValue O0 = Val->getOperand(0);
1922 SDValue O1 = Val->getOperand(1);
1923 bool CanNegateL;
1924 bool MustBeFirstL;
1925 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
1926 return false;
1927 bool CanNegateR;
1928 bool MustBeFirstR;
1929 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
1930 return false;
1931
1932 if (MustBeFirstL && MustBeFirstR)
1933 return false;
1934
1935 if (IsOR) {
1936 // For an OR expression we need to be able to naturally negate at least
1937 // one side or we cannot do the transformation at all.
1938 if (!CanNegateL && !CanNegateR)
1939 return false;
1940 // If we the result of the OR will be negated and we can naturally negate
1941 // the leafs, then this sub-tree as a whole negates naturally.
1942 CanNegate = WillNegate && CanNegateL && CanNegateR;
1943 // If we cannot naturally negate the whole sub-tree, then this must be
1944 // emitted first.
1945 MustBeFirst = !CanNegate;
1946 } else {
1947 assert(Opcode == ISD::AND && "Must be OR or AND")((Opcode == ISD::AND && "Must be OR or AND") ? static_cast
<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1947, __PRETTY_FUNCTION__))
;
1948 // We cannot naturally negate an AND operation.
1949 CanNegate = false;
1950 MustBeFirst = MustBeFirstL || MustBeFirstR;
1951 }
1952 return true;
1953 }
1954 return false;
1955}
1956
1957/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1958/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1959/// Tries to transform the given i1 producing node @p Val to a series compare
1960/// and conditional compare operations. @returns an NZCV flags producing node
1961/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1962/// transformation was not possible.
1963/// \p Negate is true if we want this sub-tree being negated just by changing
1964/// SETCC conditions.
1965static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
1966 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1967 AArch64CC::CondCode Predicate) {
1968 // We're at a tree leaf, produce a conditional comparison operation.
1969 unsigned Opcode = Val->getOpcode();
1970 if (Opcode == ISD::SETCC) {
1971 SDValue LHS = Val->getOperand(0);
1972 SDValue RHS = Val->getOperand(1);
1973 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1974 bool isInteger = LHS.getValueType().isInteger();
1975 if (Negate)
1976 CC = getSetCCInverse(CC, LHS.getValueType());
1977 SDLoc DL(Val);
1978 // Determine OutCC and handle FP special case.
1979 if (isInteger) {
1980 OutCC = changeIntCCToAArch64CC(CC);
1981 } else {
1982 assert(LHS.getValueType().isFloatingPoint())((LHS.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1982, __PRETTY_FUNCTION__))
;
1983 AArch64CC::CondCode ExtraCC;
1984 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1985 // Some floating point conditions can't be tested with a single condition
1986 // code. Construct an additional comparison in this case.
1987 if (ExtraCC != AArch64CC::AL) {
1988 SDValue ExtraCmp;
1989 if (!CCOp.getNode())
1990 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1991 else
1992 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1993 ExtraCC, DL, DAG);
1994 CCOp = ExtraCmp;
1995 Predicate = ExtraCC;
1996 }
1997 }
1998
1999 // Produce a normal comparison if we are first in the chain
2000 if (!CCOp)
2001 return emitComparison(LHS, RHS, CC, DL, DAG);
2002 // Otherwise produce a ccmp.
2003 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
2004 DAG);
2005 }
2006 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")((Val->hasOneUse() && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2006, __PRETTY_FUNCTION__))
;
2007
2008 bool IsOR = Opcode == ISD::OR;
2009
2010 SDValue LHS = Val->getOperand(0);
2011 bool CanNegateL;
2012 bool MustBeFirstL;
2013 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
2014 assert(ValidL && "Valid conjunction/disjunction tree")((ValidL && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2014, __PRETTY_FUNCTION__))
;
2015 (void)ValidL;
2016
2017 SDValue RHS = Val->getOperand(1);
2018 bool CanNegateR;
2019 bool MustBeFirstR;
2020 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
2021 assert(ValidR && "Valid conjunction/disjunction tree")((ValidR && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2021, __PRETTY_FUNCTION__))
;
2022 (void)ValidR;
2023
2024 // Swap sub-tree that must come first to the right side.
2025 if (MustBeFirstL) {
2026 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")((!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2026, __PRETTY_FUNCTION__))
;
2027 std::swap(LHS, RHS);
2028 std::swap(CanNegateL, CanNegateR);
2029 std::swap(MustBeFirstL, MustBeFirstR);
2030 }
2031
2032 bool NegateR;
2033 bool NegateAfterR;
2034 bool NegateL;
2035 bool NegateAfterAll;
2036 if (Opcode == ISD::OR) {
2037 // Swap the sub-tree that we can negate naturally to the left.
2038 if (!CanNegateL) {
2039 assert(CanNegateR && "at least one side must be negatable")((CanNegateR && "at least one side must be negatable"
) ? static_cast<void> (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2039, __PRETTY_FUNCTION__))
;
2040 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")((!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2040, __PRETTY_FUNCTION__))
;
2041 assert(!Negate)((!Negate) ? static_cast<void> (0) : __assert_fail ("!Negate"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2041, __PRETTY_FUNCTION__))
;
2042 std::swap(LHS, RHS);
2043 NegateR = false;
2044 NegateAfterR = true;
2045 } else {
2046 // Negate the left sub-tree if possible, otherwise negate the result.
2047 NegateR = CanNegateR;
2048 NegateAfterR = !CanNegateR;
2049 }
2050 NegateL = true;
2051 NegateAfterAll = !Negate;
2052 } else {
2053 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")((Opcode == ISD::AND && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2053, __PRETTY_FUNCTION__))
;
2054 assert(!Negate && "Valid conjunction/disjunction tree")((!Negate && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2054, __PRETTY_FUNCTION__))
;
2055
2056 NegateL = false;
2057 NegateR = false;
2058 NegateAfterR = false;
2059 NegateAfterAll = false;
2060 }
2061
2062 // Emit sub-trees.
2063 AArch64CC::CondCode RHSCC;
2064 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
2065 if (NegateAfterR)
2066 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
2067 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
2068 if (NegateAfterAll)
2069 OutCC = AArch64CC::getInvertedCondCode(OutCC);
2070 return CmpL;
2071}
2072
2073/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
2074/// In some cases this is even possible with OR operations in the expression.
2075/// See \ref AArch64CCMP.
2076/// \see emitConjunctionRec().
2077static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
2078 AArch64CC::CondCode &OutCC) {
2079 bool DummyCanNegate;
2080 bool DummyMustBeFirst;
2081 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
2082 return SDValue();
2083
2084 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
2085}
2086
2087/// @}
2088
2089/// Returns how profitable it is to fold a comparison's operand's shift and/or
2090/// extension operations.
2091static unsigned getCmpOperandFoldingProfit(SDValue Op) {
2092 auto isSupportedExtend = [&](SDValue V) {
2093 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2094 return true;
2095
2096 if (V.getOpcode() == ISD::AND)
2097 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2098 uint64_t Mask = MaskCst->getZExtValue();
2099 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2100 }
2101
2102 return false;
2103 };
2104
2105 if (!Op.hasOneUse())
2106 return 0;
2107
2108 if (isSupportedExtend(Op))
2109 return 1;
2110
2111 unsigned Opc = Op.getOpcode();
2112 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2113 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2114 uint64_t Shift = ShiftCst->getZExtValue();
2115 if (isSupportedExtend(Op.getOperand(0)))
2116 return (Shift <= 4) ? 2 : 1;
2117 EVT VT = Op.getValueType();
2118 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2119 return 1;
2120 }
2121
2122 return 0;
2123}
2124
2125static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2126 SDValue &AArch64cc, SelectionDAG &DAG,
2127 const SDLoc &dl) {
2128 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2129 EVT VT = RHS.getValueType();
2130 uint64_t C = RHSC->getZExtValue();
2131 if (!isLegalArithImmed(C)) {
2132 // Constant does not fit, try adjusting it by one?
2133 switch (CC) {
2134 default:
2135 break;
2136 case ISD::SETLT:
2137 case ISD::SETGE:
2138 if ((VT == MVT::i32 && C != 0x80000000 &&
2139 isLegalArithImmed((uint32_t)(C - 1))) ||
2140 (VT == MVT::i64 && C != 0x80000000ULL &&
2141 isLegalArithImmed(C - 1ULL))) {
2142 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2143 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2144 RHS = DAG.getConstant(C, dl, VT);
2145 }
2146 break;
2147 case ISD::SETULT:
2148 case ISD::SETUGE:
2149 if ((VT == MVT::i32 && C != 0 &&
2150 isLegalArithImmed((uint32_t)(C - 1))) ||
2151 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2152 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2153 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2154 RHS = DAG.getConstant(C, dl, VT);
2155 }
2156 break;
2157 case ISD::SETLE:
2158 case ISD::SETGT:
2159 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
2160 isLegalArithImmed((uint32_t)(C + 1))) ||
2161 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
2162 isLegalArithImmed(C + 1ULL))) {
2163 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2164 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2165 RHS = DAG.getConstant(C, dl, VT);
2166 }
2167 break;
2168 case ISD::SETULE:
2169 case ISD::SETUGT:
2170 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
2171 isLegalArithImmed((uint32_t)(C + 1))) ||
2172 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
2173 isLegalArithImmed(C + 1ULL))) {
2174 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2175 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2176 RHS = DAG.getConstant(C, dl, VT);
2177 }
2178 break;
2179 }
2180 }
2181 }
2182
2183 // Comparisons are canonicalized so that the RHS operand is simpler than the
2184 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2185 // can fold some shift+extend operations on the RHS operand, so swap the
2186 // operands if that can be done.
2187 //
2188 // For example:
2189 // lsl w13, w11, #1
2190 // cmp w13, w12
2191 // can be turned into:
2192 // cmp w12, w11, lsl #1
2193 if (!isa<ConstantSDNode>(RHS) ||
2194 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2195 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2196
2197 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
2198 std::swap(LHS, RHS);
2199 CC = ISD::getSetCCSwappedOperands(CC);
2200 }
2201 }
2202
2203 SDValue Cmp;
2204 AArch64CC::CondCode AArch64CC;
2205 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2206 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2207
2208 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2209 // For the i8 operand, the largest immediate is 255, so this can be easily
2210 // encoded in the compare instruction. For the i16 operand, however, the
2211 // largest immediate cannot be encoded in the compare.
2212 // Therefore, use a sign extending load and cmn to avoid materializing the
2213 // -1 constant. For example,
2214 // movz w1, #65535
2215 // ldrh w0, [x0, #0]
2216 // cmp w0, w1
2217 // >
2218 // ldrsh w0, [x0, #0]
2219 // cmn w0, #1
2220 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2221 // if and only if (sext LHS) == (sext RHS). The checks are in place to
2222 // ensure both the LHS and RHS are truly zero extended and to make sure the
2223 // transformation is profitable.
2224 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2225 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2226 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2227 LHS.getNode()->hasNUsesOfValue(1, 0)) {
2228 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2229 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2230 SDValue SExt =
2231 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2232 DAG.getValueType(MVT::i16));
2233 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2234 RHS.getValueType()),
2235 CC, dl, DAG);
2236 AArch64CC = changeIntCCToAArch64CC(CC);
2237 }
2238 }
2239
2240 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2241 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2242 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2243 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2244 }
2245 }
2246 }
2247
2248 if (!Cmp) {
2249 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
2250 AArch64CC = changeIntCCToAArch64CC(CC);
2251 }
2252 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
2253 return Cmp;
2254}
2255
2256static std::pair<SDValue, SDValue>
2257getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
2258 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2259, __PRETTY_FUNCTION__))
2259 "Unsupported value type")(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2259, __PRETTY_FUNCTION__))
;
2260 SDValue Value, Overflow;
2261 SDLoc DL(Op);
2262 SDValue LHS = Op.getOperand(0);
2263 SDValue RHS = Op.getOperand(1);
2264 unsigned Opc = 0;
2265 switch (Op.getOpcode()) {
2266 default:
2267 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2267)
;
2268 case ISD::SADDO:
2269 Opc = AArch64ISD::ADDS;
2270 CC = AArch64CC::VS;
2271 break;
2272 case ISD::UADDO:
2273 Opc = AArch64ISD::ADDS;
2274 CC = AArch64CC::HS;
2275 break;
2276 case ISD::SSUBO:
2277 Opc = AArch64ISD::SUBS;
2278 CC = AArch64CC::VS;
2279 break;
2280 case ISD::USUBO:
2281 Opc = AArch64ISD::SUBS;
2282 CC = AArch64CC::LO;
2283 break;
2284 // Multiply needs a little bit extra work.
2285 case ISD::SMULO:
2286 case ISD::UMULO: {
2287 CC = AArch64CC::NE;
2288 bool IsSigned = Op.getOpcode() == ISD::SMULO;
2289 if (Op.getValueType() == MVT::i32) {
2290 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2291 // For a 32 bit multiply with overflow check we want the instruction
2292 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
2293 // need to generate the following pattern:
2294 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
2295 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
2296 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
2297 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2298 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
2299 DAG.getConstant(0, DL, MVT::i64));
2300 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
2301 // operation. We need to clear out the upper 32 bits, because we used a
2302 // widening multiply that wrote all 64 bits. In the end this should be a
2303 // noop.
2304 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
2305 if (IsSigned) {
2306 // The signed overflow check requires more than just a simple check for
2307 // any bit set in the upper 32 bits of the result. These bits could be
2308 // just the sign bits of a negative number. To perform the overflow
2309 // check we have to arithmetic shift right the 32nd bit of the result by
2310 // 31 bits. Then we compare the result to the upper 32 bits.
2311 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
2312 DAG.getConstant(32, DL, MVT::i64));
2313 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
2314 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
2315 DAG.getConstant(31, DL, MVT::i64));
2316 // It is important that LowerBits is last, otherwise the arithmetic
2317 // shift will not be folded into the compare (SUBS).
2318 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
2319 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2320 .getValue(1);
2321 } else {
2322 // The overflow check for unsigned multiply is easy. We only need to
2323 // check if any of the upper 32 bits are set. This can be done with a
2324 // CMP (shifted register). For that we need to generate the following
2325 // pattern:
2326 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
2327 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2328 DAG.getConstant(32, DL, MVT::i64));
2329 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2330 Overflow =
2331 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2332 DAG.getConstant(0, DL, MVT::i64),
2333 UpperBits).getValue(1);
2334 }
2335 break;
2336 }
2337 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")((Op.getValueType() == MVT::i64 && "Expected an i64 value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2337, __PRETTY_FUNCTION__))
;
2338 // For the 64 bit multiply
2339 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2340 if (IsSigned) {
2341 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
2342 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
2343 DAG.getConstant(63, DL, MVT::i64));
2344 // It is important that LowerBits is last, otherwise the arithmetic
2345 // shift will not be folded into the compare (SUBS).
2346 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2347 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2348 .getValue(1);
2349 } else {
2350 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
2351 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2352 Overflow =
2353 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2354 DAG.getConstant(0, DL, MVT::i64),
2355 UpperBits).getValue(1);
2356 }
2357 break;
2358 }
2359 } // switch (...)
2360
2361 if (Opc) {
2362 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
2363
2364 // Emit the AArch64 operation with overflow check.
2365 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
2366 Overflow = Value.getValue(1);
2367 }
2368 return std::make_pair(Value, Overflow);
2369}
2370
2371SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
2372 RTLIB::Libcall Call) const {
2373 bool IsStrict = Op->isStrictFPOpcode();
2374 unsigned Offset = IsStrict ? 1 : 0;
2375 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
2376 SmallVector<SDValue, 2> Ops(Op->op_begin() + Offset, Op->op_end());
2377 MakeLibCallOptions CallOptions;
2378 SDValue Result;
2379 SDLoc dl(Op);
2380 std::tie(Result, Chain) = makeLibCall(DAG, Call, Op.getValueType(), Ops,
2381 CallOptions, dl, Chain);
2382 return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
2383}
2384
2385static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
2386 SDValue Sel = Op.getOperand(0);
2387 SDValue Other = Op.getOperand(1);
2388 SDLoc dl(Sel);
2389
2390 // If the operand is an overflow checking operation, invert the condition
2391 // code and kill the Not operation. I.e., transform:
2392 // (xor (overflow_op_bool, 1))
2393 // -->
2394 // (csel 1, 0, invert(cc), overflow_op_bool)
2395 // ... which later gets transformed to just a cset instruction with an
2396 // inverted condition code, rather than a cset + eor sequence.
2397 if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
2398 // Only lower legal XALUO ops.
2399 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2400 return SDValue();
2401
2402 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2403 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2404 AArch64CC::CondCode CC;
2405 SDValue Value, Overflow;
2406 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2407 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2408 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2409 CCVal, Overflow);
2410 }
2411 // If neither operand is a SELECT_CC, give up.
2412 if (Sel.getOpcode() != ISD::SELECT_CC)
2413 std::swap(Sel, Other);
2414 if (Sel.getOpcode() != ISD::SELECT_CC)
2415 return Op;
2416
2417 // The folding we want to perform is:
2418 // (xor x, (select_cc a, b, cc, 0, -1) )
2419 // -->
2420 // (csel x, (xor x, -1), cc ...)
2421 //
2422 // The latter will get matched to a CSINV instruction.
2423
2424 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2425 SDValue LHS = Sel.getOperand(0);
2426 SDValue RHS = Sel.getOperand(1);
2427 SDValue TVal = Sel.getOperand(2);
2428 SDValue FVal = Sel.getOperand(3);
2429
2430 // FIXME: This could be generalized to non-integer comparisons.
2431 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2432 return Op;
2433
2434 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2435 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2436
2437 // The values aren't constants, this isn't the pattern we're looking for.
2438 if (!CFVal || !CTVal)
2439 return Op;
2440
2441 // We can commute the SELECT_CC by inverting the condition. This
2442 // might be needed to make this fit into a CSINV pattern.
2443 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2444 std::swap(TVal, FVal);
2445 std::swap(CTVal, CFVal);
2446 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
2447 }
2448
2449 // If the constants line up, perform the transform!
2450 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2451 SDValue CCVal;
2452 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2453
2454 FVal = Other;
2455 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2456 DAG.getConstant(-1ULL, dl, Other.getValueType()));
2457
2458 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2459 CCVal, Cmp);
2460 }
2461
2462 return Op;
2463}
2464
2465static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2466 EVT VT = Op.getValueType();
2467
2468 // Let legalize expand this if it isn't a legal type yet.
2469 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2470 return SDValue();
2471
2472 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2473
2474 unsigned Opc;
2475 bool ExtraOp = false;
2476 switch (Op.getOpcode()) {
2477 default:
2478 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2478)
;
2479 case ISD::ADDC:
2480 Opc = AArch64ISD::ADDS;
2481 break;
2482 case ISD::SUBC:
2483 Opc = AArch64ISD::SUBS;
2484 break;
2485 case ISD::ADDE:
2486 Opc = AArch64ISD::ADCS;
2487 ExtraOp = true;
2488 break;
2489 case ISD::SUBE:
2490 Opc = AArch64ISD::SBCS;
2491 ExtraOp = true;
2492 break;
2493 }
2494
2495 if (!ExtraOp)
2496 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2497 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2498 Op.getOperand(2));
2499}
2500
2501static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
2502 // Let legalize expand this if it isn't a legal type yet.
2503 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
2504 return SDValue();
2505
2506 SDLoc dl(Op);
2507 AArch64CC::CondCode CC;
2508 // The actual operation that sets the overflow or carry flag.
2509 SDValue Value, Overflow;
2510 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2511
2512 // We use 0 and 1 as false and true values.
2513 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2514 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2515
2516 // We use an inverted condition, because the conditional select is inverted
2517 // too. This will allow it to be selected to a single instruction:
2518 // CSINC Wd, WZR, WZR, invert(cond).
2519 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2520 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2521 CCVal, Overflow);
2522
2523 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2524 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2525}
2526
2527// Prefetch operands are:
2528// 1: Address to prefetch
2529// 2: bool isWrite
2530// 3: int locality (0 = no locality ... 3 = extreme locality)
2531// 4: bool isDataCache
2532static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
2533 SDLoc DL(Op);
2534 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2535 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2536 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2537
2538 bool IsStream = !Locality;
2539 // When the locality number is set
2540 if (Locality) {
2541 // The front-end should have filtered out the out-of-range values
2542 assert(Locality <= 3 && "Prefetch locality out-of-range")((Locality <= 3 && "Prefetch locality out-of-range"
) ? static_cast<void> (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2542, __PRETTY_FUNCTION__))
;
2543 // The locality degree is the opposite of the cache speed.
2544 // Put the number the other way around.
2545 // The encoding starts at 0 for level 1
2546 Locality = 3 - Locality;
2547 }
2548
2549 // built the mask value encoding the expected behavior.
2550 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
2551 (!IsData << 3) | // IsDataCache bit
2552 (Locality << 1) | // Cache level bits
2553 (unsigned)IsStream; // Stream bit
2554 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2555 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2556}
2557
2558SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2559 SelectionDAG &DAG) const {
2560 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")((Op.getValueType() == MVT::f128 && "Unexpected lowering"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2560, __PRETTY_FUNCTION__))
;
2561
2562 RTLIB::Libcall LC;
2563 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
2564
2565 return LowerF128Call(Op, DAG, LC);
2566}
2567
2568SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2569 SelectionDAG &DAG) const {
2570 bool IsStrict = Op->isStrictFPOpcode();
2571 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
2572 if (SrcVal.getValueType() != MVT::f128) {
2573 // It's legal except when f128 is involved
2574 return Op;
2575 }
2576
2577 RTLIB::Libcall LC;
2578 LC = RTLIB::getFPROUND(SrcVal.getValueType(), Op.getValueType());
2579
2580 // FP_ROUND node has a second operand indicating whether it is known to be
2581 // precise. That doesn't take part in the LibCall so we can't directly use
2582 // LowerF128Call.
2583 MakeLibCallOptions CallOptions;
2584 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
2585 SDValue Result;
2586 SDLoc dl(Op);
2587 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
2588 CallOptions, dl, Chain);
2589 return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
2590}
2591
2592SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
2593 SelectionDAG &DAG) const {
2594 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2595 // Any additional optimization in this function should be recorded
2596 // in the cost tables.
2597 EVT InVT = Op.getOperand(0).getValueType();
2598 EVT VT = Op.getValueType();
2599 unsigned NumElts = InVT.getVectorNumElements();
2600
2601 // f16 conversions are promoted to f32 when full fp16 is not supported.
2602 if (InVT.getVectorElementType() == MVT::f16 &&
2603 !Subtarget->hasFullFP16()) {
2604 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2605 SDLoc dl(Op);
2606 return DAG.getNode(
2607 Op.getOpcode(), dl, Op.getValueType(),
2608 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2609 }
2610
2611 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2612 SDLoc dl(Op);
2613 SDValue Cv =
2614 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2615 Op.getOperand(0));
2616 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2617 }
2618
2619 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2620 SDLoc dl(Op);
2621 MVT ExtVT =
2622 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
2623 VT.getVectorNumElements());
2624 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2625 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2626 }
2627
2628 // Type changing conversions are illegal.
2629 return Op;
2630}
2631
2632SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2633 SelectionDAG &DAG) const {
2634 bool IsStrict = Op->isStrictFPOpcode();
2635 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
2636
2637 if (SrcVal.getValueType().isVector())
2638 return LowerVectorFP_TO_INT(Op, DAG);
2639
2640 // f16 conversions are promoted to f32 when full fp16 is not supported.
2641 if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
2642 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")((!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2642, __PRETTY_FUNCTION__))
;
2643 SDLoc dl(Op);
2644 return DAG.getNode(
2645 Op.getOpcode(), dl, Op.getValueType(),
2646 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
2647 }
2648
2649 if (SrcVal.getValueType() != MVT::f128) {
2650 // It's legal except when f128 is involved
2651 return Op;
2652 }
2653
2654 RTLIB::Libcall LC;
2655 if (Op.getOpcode() == ISD::FP_TO_SINT ||
2656 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
2657 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(), Op.getValueType());
2658 else
2659 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(), Op.getValueType());
2660
2661 return LowerF128Call(Op, DAG, LC);
2662}
2663
2664static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2665 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2666 // Any additional optimization in this function should be recorded
2667 // in the cost tables.
2668 EVT VT = Op.getValueType();
2669 SDLoc dl(Op);
2670 SDValue In = Op.getOperand(0);
2671 EVT InVT = In.getValueType();
2672
2673 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2674 MVT CastVT =
2675 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
2676 InVT.getVectorNumElements());
2677 In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2678 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2679 }
2680
2681 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2682 unsigned CastOpc =
2683 Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2684 EVT CastVT = VT.changeVectorElementTypeToInteger();
2685 In = DAG.getNode(CastOpc, dl, CastVT, In);
2686 return DAG.getNode(Op.getOpcode(), dl, VT, In);
2687 }
2688
2689 return Op;
2690}
2691
2692SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2693 SelectionDAG &DAG) const {
2694 if (Op.getValueType().isVector())
2695 return LowerVectorINT_TO_FP(Op, DAG);
2696
2697 bool IsStrict = Op->isStrictFPOpcode();
2698 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
2699
2700 // f16 conversions are promoted to f32 when full fp16 is not supported.
2701 if (Op.getValueType() == MVT::f16 &&
2702 !Subtarget->hasFullFP16()) {
2703 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")((!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2703, __PRETTY_FUNCTION__))
;
2704 SDLoc dl(Op);
2705 return DAG.getNode(
2706 ISD::FP_ROUND, dl, MVT::f16,
2707 DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
2708 DAG.getIntPtrConstant(0, dl));
2709 }
2710
2711 // i128 conversions are libcalls.
2712 if (SrcVal.getValueType() == MVT::i128)
2713 return SDValue();
2714
2715 // Other conversions are legal, unless it's to the completely software-based
2716 // fp128.
2717 if (Op.getValueType() != MVT::f128)
2718 return Op;
2719
2720 RTLIB::Libcall LC;
2721 if (Op.getOpcode() == ISD::SINT_TO_FP ||
2722 Op.getOpcode() == ISD::STRICT_SINT_TO_FP)
2723 LC = RTLIB::getSINTTOFP(SrcVal.getValueType(), Op.getValueType());
2724 else
2725 LC = RTLIB::getUINTTOFP(SrcVal.getValueType(), Op.getValueType());
2726
2727 return LowerF128Call(Op, DAG, LC);
2728}
2729
2730SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2731 SelectionDAG &DAG) const {
2732 // For iOS, we want to call an alternative entry point: __sincos_stret,
2733 // which returns the values in two S / D registers.
2734 SDLoc dl(Op);
2735 SDValue Arg = Op.getOperand(0);
2736 EVT ArgVT = Arg.getValueType();
2737 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2738
2739 ArgListTy Args;
2740 ArgListEntry Entry;
2741
2742 Entry.Node = Arg;
2743 Entry.Ty = ArgTy;
2744 Entry.IsSExt = false;
2745 Entry.IsZExt = false;
2746 Args.push_back(Entry);
2747
2748 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
2749 : RTLIB::SINCOS_STRET_F32;
2750 const char *LibcallName = getLibcallName(LC);
2751 SDValue Callee =
2752 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2753
2754 StructType *RetTy = StructType::get(ArgTy, ArgTy);
2755 TargetLowering::CallLoweringInfo CLI(DAG);
2756 CLI.setDebugLoc(dl)
2757 .setChain(DAG.getEntryNode())
2758 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2759
2760 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2761 return CallResult.first;
2762}
2763
2764static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2765 if (Op.getValueType() != MVT::f16)
2766 return SDValue();
2767
2768 assert(Op.getOperand(0).getValueType() == MVT::i16)((Op.getOperand(0).getValueType() == MVT::i16) ? static_cast<
void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2768, __PRETTY_FUNCTION__))
;
2769 SDLoc DL(Op);
2770
2771 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2772 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2773 return SDValue(
2774 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2775 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2776 0);
2777}
2778
2779static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2780 if (OrigVT.getSizeInBits() >= 64)
2781 return OrigVT;
2782
2783 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2783, __PRETTY_FUNCTION__))
;
2784
2785 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2786 switch (OrigSimpleTy) {
2787 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2787)
;
2788 case MVT::v2i8:
2789 case MVT::v2i16:
2790 return MVT::v2i32;
2791 case MVT::v4i8:
2792 return MVT::v4i16;
2793 }
2794}
2795
2796static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2797 const EVT &OrigTy,
2798 const EVT &ExtTy,
2799 unsigned ExtOpcode) {
2800 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2801 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2802 // 64-bits we need to insert a new extension so that it will be 64-bits.
2803 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2803, __PRETTY_FUNCTION__))
;
2804 if (OrigTy.getSizeInBits() >= 64)
2805 return N;
2806
2807 // Must extend size to at least 64 bits to be used as an operand for VMULL.
2808 EVT NewVT = getExtensionTo64Bits(OrigTy);
2809
2810 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2811}
2812
2813static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2814 bool isSigned) {
2815 EVT VT = N->getValueType(0);
2816
2817 if (N->getOpcode() != ISD::BUILD_VECTOR)
2818 return false;
2819
2820 for (const SDValue &Elt : N->op_values()) {
2821 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2822 unsigned EltSize = VT.getScalarSizeInBits();
2823 unsigned HalfSize = EltSize / 2;
2824 if (isSigned) {
2825 if (!isIntN(HalfSize, C->getSExtValue()))
2826 return false;
2827 } else {
2828 if (!isUIntN(HalfSize, C->getZExtValue()))
2829 return false;
2830 }
2831 continue;
2832 }
2833 return false;
2834 }
2835
2836 return true;
2837}
2838
2839static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2840 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2841 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2842 N->getOperand(0)->getValueType(0),
2843 N->getValueType(0),
2844 N->getOpcode());
2845
2846 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2846, __PRETTY_FUNCTION__))
;
2847 EVT VT = N->getValueType(0);
2848 SDLoc dl(N);
2849 unsigned EltSize = VT.getScalarSizeInBits() / 2;
2850 unsigned NumElts = VT.getVectorNumElements();
2851 MVT TruncVT = MVT::getIntegerVT(EltSize);
2852 SmallVector<SDValue, 8> Ops;
2853 for (unsigned i = 0; i != NumElts; ++i) {
2854 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2855 const APInt &CInt = C->getAPIntValue();
2856 // Element types smaller than 32 bits are not legal, so use i32 elements.
2857 // The values are implicitly truncated so sext vs. zext doesn't matter.
2858 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2859 }
2860 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2861}
2862
2863static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2864 return N->getOpcode() == ISD::SIGN_EXTEND ||
2865 isExtendedBUILD_VECTOR(N, DAG, true);
2866}
2867
2868static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2869 return N->getOpcode() == ISD::ZERO_EXTEND ||
2870 isExtendedBUILD_VECTOR(N, DAG, false);
2871}
2872
2873static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2874 unsigned Opcode = N->getOpcode();
2875 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2876 SDNode *N0 = N->getOperand(0).getNode();
2877 SDNode *N1 = N->getOperand(1).getNode();
2878 return N0->hasOneUse() && N1->hasOneUse() &&
2879 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2880 }
2881 return false;
2882}
2883
2884static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2885 unsigned Opcode = N->getOpcode();
2886 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2887 SDNode *N0 = N->getOperand(0).getNode();
2888 SDNode *N1 = N->getOperand(1).getNode();
2889 return N0->hasOneUse() && N1->hasOneUse() &&
2890 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2891 }
2892 return false;
2893}
2894
2895SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2896 SelectionDAG &DAG) const {
2897 // The rounding mode is in bits 23:22 of the FPSCR.
2898 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2899 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2900 // so that the shift + and get folded into a bitfield extract.
2901 SDLoc dl(Op);
2902
2903 SDValue Chain = Op.getOperand(0);
2904 SDValue FPCR_64 = DAG.getNode(
2905 ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
2906 {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
2907 Chain = FPCR_64.getValue(1);
2908 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
2909 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
2910 DAG.getConstant(1U << 22, dl, MVT::i32));
2911 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2912 DAG.getConstant(22, dl, MVT::i32));
2913 SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2914 DAG.getConstant(3, dl, MVT::i32));
2915 return DAG.getMergeValues({AND, Chain}, dl);
2916}
2917
2918static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2919 // Multiplications are only custom-lowered for 128-bit vectors so that
2920 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2921 EVT VT = Op.getValueType();
2922 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2923, __PRETTY_FUNCTION__))
2923 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2923, __PRETTY_FUNCTION__))
;
2924 SDNode *N0 = Op.getOperand(0).getNode();
2925 SDNode *N1 = Op.getOperand(1).getNode();
2926 unsigned NewOpc = 0;
2927 bool isMLA = false;
2928 bool isN0SExt = isSignExtended(N0, DAG);
2929 bool isN1SExt = isSignExtended(N1, DAG);
2930 if (isN0SExt && isN1SExt)
2931 NewOpc = AArch64ISD::SMULL;
2932 else {
2933 bool isN0ZExt = isZeroExtended(N0, DAG);
2934 bool isN1ZExt = isZeroExtended(N1, DAG);
2935 if (isN0ZExt && isN1ZExt)
2936 NewOpc = AArch64ISD::UMULL;
2937 else if (isN1SExt || isN1ZExt) {
2938 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2939 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2940 if (isN1SExt && isAddSubSExt(N0, DAG)) {
2941 NewOpc = AArch64ISD::SMULL;
2942 isMLA = true;
2943 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2944 NewOpc = AArch64ISD::UMULL;
2945 isMLA = true;
2946 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2947 std::swap(N0, N1);
2948 NewOpc = AArch64ISD::UMULL;
2949 isMLA = true;
2950 }
2951 }
2952
2953 if (!NewOpc) {
2954 if (VT == MVT::v2i64)
2955 // Fall through to expand this. It is not legal.
2956 return SDValue();
2957 else
2958 // Other vector multiplications are legal.
2959 return Op;
2960 }
2961 }
2962
2963 // Legalize to a S/UMULL instruction
2964 SDLoc DL(Op);
2965 SDValue Op0;
2966 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2967 if (!isMLA) {
2968 Op0 = skipExtensionForVectorMULL(N0, DAG);
2969 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2971, __PRETTY_FUNCTION__))
2970 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2971, __PRETTY_FUNCTION__))
2971 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2971, __PRETTY_FUNCTION__))
;
2972 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2973 }
2974 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2975 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2976 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2977 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2978 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2979 EVT Op1VT = Op1.getValueType();
2980 return DAG.getNode(N0->getOpcode(), DL, VT,
2981 DAG.getNode(NewOpc, DL, VT,
2982 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2983 DAG.getNode(NewOpc, DL, VT,
2984 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2985}
2986
2987static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
2988 int Pattern) {
2989 return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
2990 DAG.getTargetConstant(Pattern, DL, MVT::i32));
2991}
2992
2993SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2994 SelectionDAG &DAG) const {
2995 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2996 SDLoc dl(Op);
2997 switch (IntNo) {
2998 default: return SDValue(); // Don't custom lower most intrinsics.
2999 case Intrinsic::thread_pointer: {
3000 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3001 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
3002 }
3003 case Intrinsic::aarch64_neon_abs: {
3004 EVT Ty = Op.getValueType();
3005 if (Ty == MVT::i64) {
3006 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
3007 Op.getOperand(1));
3008 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
3009 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
3010 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
3011 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
3012 } else {
3013 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
3014 }
3015 }
3016 case Intrinsic::aarch64_neon_smax:
3017 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
3018 Op.getOperand(1), Op.getOperand(2));
3019 case Intrinsic::aarch64_neon_umax:
3020 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
3021 Op.getOperand(1), Op.getOperand(2));
3022 case Intrinsic::aarch64_neon_smin:
3023 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
3024 Op.getOperand(1), Op.getOperand(2));
3025 case Intrinsic::aarch64_neon_umin:
3026 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
3027 Op.getOperand(1), Op.getOperand(2));
3028
3029 case Intrinsic::aarch64_sve_sunpkhi:
3030 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
3031 Op.getOperand(1));
3032 case Intrinsic::aarch64_sve_sunpklo:
3033 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
3034 Op.getOperand(1));
3035 case Intrinsic::aarch64_sve_uunpkhi:
3036 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
3037 Op.getOperand(1));
3038 case Intrinsic::aarch64_sve_uunpklo:
3039 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
3040 Op.getOperand(1));
3041 case Intrinsic::aarch64_sve_clasta_n:
3042 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
3043 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3044 case Intrinsic::aarch64_sve_clastb_n:
3045 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
3046 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3047 case Intrinsic::aarch64_sve_lasta:
3048 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
3049 Op.getOperand(1), Op.getOperand(2));
3050 case Intrinsic::aarch64_sve_lastb:
3051 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
3052 Op.getOperand(1), Op.getOperand(2));
3053 case Intrinsic::aarch64_sve_rev:
3054 return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(),
3055 Op.getOperand(1));
3056 case Intrinsic::aarch64_sve_tbl:
3057 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
3058 Op.getOperand(1), Op.getOperand(2));
3059 case Intrinsic::aarch64_sve_trn1:
3060 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
3061 Op.getOperand(1), Op.getOperand(2));
3062 case Intrinsic::aarch64_sve_trn2:
3063 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
3064 Op.getOperand(1), Op.getOperand(2));
3065 case Intrinsic::aarch64_sve_uzp1:
3066 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
3067 Op.getOperand(1), Op.getOperand(2));
3068 case Intrinsic::aarch64_sve_uzp2:
3069 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
3070 Op.getOperand(1), Op.getOperand(2));
3071 case Intrinsic::aarch64_sve_zip1:
3072 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
3073 Op.getOperand(1), Op.getOperand(2));
3074 case Intrinsic::aarch64_sve_zip2:
3075 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
3076 Op.getOperand(1), Op.getOperand(2));
3077 case Intrinsic::aarch64_sve_ptrue:
3078 return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
3079 Op.getOperand(1));
3080 case Intrinsic::aarch64_sve_dupq_lane:
3081 return LowerDUPQLane(Op, DAG);
3082 case Intrinsic::aarch64_sve_convert_from_svbool:
3083 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
3084 Op.getOperand(1));
3085 case Intrinsic::aarch64_sve_convert_to_svbool: {
3086 EVT OutVT = Op.getValueType();
3087 EVT InVT = Op.getOperand(1).getValueType();
3088 // Return the operand if the cast isn't changing type,
3089 // i.e. <n x 16 x i1> -> <n x 16 x i1>
3090 if (InVT == OutVT)
3091 return Op.getOperand(1);
3092 // Otherwise, zero the newly introduced lanes.
3093 SDValue Reinterpret =
3094 DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Op.getOperand(1));
3095 SDValue Mask = getPTrue(DAG, dl, InVT, AArch64SVEPredPattern::all);
3096 SDValue MaskReinterpret =
3097 DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Mask);
3098 return DAG.getNode(ISD::AND, dl, OutVT, Reinterpret, MaskReinterpret);
3099 }
3100
3101 case Intrinsic::aarch64_sve_insr: {
3102 SDValue Scalar = Op.getOperand(2);
3103 EVT ScalarTy = Scalar.getValueType();
3104 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
3105 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
3106
3107 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
3108 Op.getOperand(1), Scalar);
3109 }
3110
3111 case Intrinsic::localaddress: {
3112 const auto &MF = DAG.getMachineFunction();
3113 const auto *RegInfo = Subtarget->getRegisterInfo();
3114 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
3115 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
3116 Op.getSimpleValueType());
3117 }
3118
3119 case Intrinsic::eh_recoverfp: {
3120 // FIXME: This needs to be implemented to correctly handle highly aligned
3121 // stack objects. For now we simply return the incoming FP. Refer D53541
3122 // for more details.
3123 SDValue FnOp = Op.getOperand(1);
3124 SDValue IncomingFPOp = Op.getOperand(2);
3125 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
3126 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
3127 if (!Fn)
3128 report_fatal_error(
3129 "llvm.eh.recoverfp must take a function as the first argument");
3130 return IncomingFPOp;
3131 }
3132 }
3133}
3134
3135bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
3136 return ExtVal.getValueType().isScalableVector();
3137}
3138
3139// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
3140static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
3141 EVT VT, EVT MemVT,
3142 SelectionDAG &DAG) {
3143 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3143, __PRETTY_FUNCTION__))
;
3144 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)((MemVT == MVT::v4i8 && VT == MVT::v4i16) ? static_cast
<void> (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3144, __PRETTY_FUNCTION__))
;
3145
3146 SDValue Value = ST->getValue();
3147
3148 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
3149 // the word lane which represent the v4i8 subvector. It optimizes the store
3150 // to:
3151 //
3152 // xtn v0.8b, v0.8h
3153 // str s0, [x0]
3154
3155 SDValue Undef = DAG.getUNDEF(MVT::i16);
3156 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
3157 {Undef, Undef, Undef, Undef});
3158
3159 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
3160 Value, UndefVec);
3161 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
3162
3163 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
3164 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3165 Trunc, DAG.getConstant(0, DL, MVT::i64));
3166
3167 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
3168 ST->getBasePtr(), ST->getMemOperand());
3169}
3170
3171// Custom lowering for any store, vector or scalar and/or default or with
3172// a truncate operations. Currently only custom lower truncate operation
3173// from vector v4i16 to v4i8 or volatile stores of i128.
3174SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
3175 SelectionDAG &DAG) const {
3176 SDLoc Dl(Op);
3177 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
3178 assert (StoreNode && "Can only custom lower store nodes")((StoreNode && "Can only custom lower store nodes") ?
static_cast<void> (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3178, __PRETTY_FUNCTION__))
;
3179
3180 SDValue Value = StoreNode->getValue();
3181
3182 EVT VT = Value.getValueType();
3183 EVT MemVT = StoreNode->getMemoryVT();
3184
3185 if (VT.isVector()) {
3186 unsigned AS = StoreNode->getAddressSpace();
3187 unsigned Align = StoreNode->getAlignment();
3188 if (Align < MemVT.getStoreSize() &&
3189 !allowsMisalignedMemoryAccesses(MemVT, AS, Align,
3190 StoreNode->getMemOperand()->getFlags(),
3191 nullptr)) {
3192 return scalarizeVectorStore(StoreNode, DAG);
3193 }
3194
3195 if (StoreNode->isTruncatingStore()) {
3196 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
3197 }
3198 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
3199 // the custom lowering, as there are no un-paired non-temporal stores and
3200 // legalization will break up 256 bit inputs.
3201 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
3202 MemVT.getVectorElementCount().Min % 2u == 0 &&
3203 ((MemVT.getScalarSizeInBits() == 8u ||
3204 MemVT.getScalarSizeInBits() == 16u ||
3205 MemVT.getScalarSizeInBits() == 32u ||
3206 MemVT.getScalarSizeInBits() == 64u))) {
3207 SDValue Lo =
3208 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
3209 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
3210 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
3211 SDValue Hi = DAG.getNode(
3212 ISD::EXTRACT_SUBVECTOR, Dl,
3213 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
3214 StoreNode->getValue(),
3215 DAG.getConstant(MemVT.getVectorElementCount().Min / 2, Dl, MVT::i64));
3216 SDValue Result = DAG.getMemIntrinsicNode(
3217 AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
3218 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
3219 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
3220 return Result;
3221 }
3222 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
3223 assert(StoreNode->getValue()->getValueType(0) == MVT::i128)((StoreNode->getValue()->getValueType(0) == MVT::i128) ?
static_cast<void> (0) : __assert_fail ("StoreNode->getValue()->getValueType(0) == MVT::i128"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3223, __PRETTY_FUNCTION__))
;
3224 SDValue Lo =
3225 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
3226 DAG.getConstant(0, Dl, MVT::i64));
3227 SDValue Hi =
3228 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
3229 DAG.getConstant(1, Dl, MVT::i64));
3230 SDValue Result = DAG.getMemIntrinsicNode(
3231 AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
3232 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
3233 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
3234 return Result;
3235 }
3236
3237 return SDValue();
3238}
3239
3240SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
3241 SelectionDAG &DAG) const {
3242 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
3243 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
3244
3245 switch (Op.getOpcode()) {
3246 default:
3247 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3247)
;
3248 return SDValue();
3249 case ISD::BITCAST:
3250 return LowerBITCAST(Op, DAG);
3251 case ISD::GlobalAddress:
3252 return LowerGlobalAddress(Op, DAG);
3253 case ISD::GlobalTLSAddress:
3254 return LowerGlobalTLSAddress(Op, DAG);
3255 case ISD::SETCC:
3256 case ISD::STRICT_FSETCC:
3257 case ISD::STRICT_FSETCCS:
3258 return LowerSETCC(Op, DAG);
3259 case ISD::BR_CC:
3260 return LowerBR_CC(Op, DAG);
3261 case ISD::SELECT:
3262 return LowerSELECT(Op, DAG);
3263 case ISD::SELECT_CC:
3264 return LowerSELECT_CC(Op, DAG);
3265 case ISD::JumpTable:
3266 return LowerJumpTable(Op, DAG);
3267 case ISD::BR_JT:
3268 return LowerBR_JT(Op, DAG);
3269 case ISD::ConstantPool:
3270 return LowerConstantPool(Op, DAG);
3271 case ISD::BlockAddress:
3272 return LowerBlockAddress(Op, DAG);
3273 case ISD::VASTART:
3274 return LowerVASTART(Op, DAG);
3275 case ISD::VACOPY:
3276 return LowerVACOPY(Op, DAG);
3277 case ISD::VAARG:
3278 return LowerVAARG(Op, DAG);
3279 case ISD::ADDC:
3280 case ISD::ADDE:
3281 case ISD::SUBC:
3282 case ISD::SUBE:
3283 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
3284 case ISD::SADDO:
3285 case ISD::UADDO:
3286 case ISD::SSUBO:
3287 case ISD::USUBO:
3288 case ISD::SMULO:
3289 case ISD::UMULO:
3290 return LowerXALUO(Op, DAG);
3291 case ISD::FADD:
3292 return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
3293 case ISD::FSUB:
3294 return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
3295 case ISD::FMUL:
3296 return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
3297 case ISD::FDIV:
3298 return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
3299 case ISD::FP_ROUND:
3300 case ISD::STRICT_FP_ROUND:
3301 return LowerFP_ROUND(Op, DAG);
3302 case ISD::FP_EXTEND:
3303 return LowerFP_EXTEND(Op, DAG);
3304 case ISD::FRAMEADDR:
3305 return LowerFRAMEADDR(Op, DAG);
3306 case ISD::SPONENTRY:
3307 return LowerSPONENTRY(Op, DAG);
3308 case ISD::RETURNADDR:
3309 return LowerRETURNADDR(Op, DAG);
3310 case ISD::ADDROFRETURNADDR:
3311 return LowerADDROFRETURNADDR(Op, DAG);
3312 case ISD::INSERT_VECTOR_ELT:
3313 return LowerINSERT_VECTOR_ELT(Op, DAG);
3314 case ISD::EXTRACT_VECTOR_ELT:
3315 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3316 case ISD::BUILD_VECTOR:
3317 return LowerBUILD_VECTOR(Op, DAG);
3318 case ISD::VECTOR_SHUFFLE:
3319 return LowerVECTOR_SHUFFLE(Op, DAG);
3320 case ISD::SPLAT_VECTOR:
3321 return LowerSPLAT_VECTOR(Op, DAG);
3322 case ISD::EXTRACT_SUBVECTOR:
3323 return LowerEXTRACT_SUBVECTOR(Op, DAG);
3324 case ISD::SRA:
3325 case ISD::SRL:
3326 case ISD::SHL:
3327 return LowerVectorSRA_SRL_SHL(Op, DAG);
3328 case ISD::SHL_PARTS:
3329 return LowerShiftLeftParts(Op, DAG);
3330 case ISD::SRL_PARTS:
3331 case ISD::SRA_PARTS:
3332 return LowerShiftRightParts(Op, DAG);
3333 case ISD::CTPOP:
3334 return LowerCTPOP(Op, DAG);
3335 case ISD::FCOPYSIGN:
3336 return LowerFCOPYSIGN(Op, DAG);
3337 case ISD::OR:
3338 return LowerVectorOR(Op, DAG);
3339 case ISD::XOR:
3340 return LowerXOR(Op, DAG);
3341 case ISD::PREFETCH:
3342 return LowerPREFETCH(Op, DAG);
3343 case ISD::SINT_TO_FP:
3344 case ISD::UINT_TO_FP:
3345 case ISD::STRICT_SINT_TO_FP:
3346 case ISD::STRICT_UINT_TO_FP:
3347 return LowerINT_TO_FP(Op, DAG);
3348 case ISD::FP_TO_SINT:
3349 case ISD::FP_TO_UINT:
3350 case ISD::STRICT_FP_TO_SINT:
3351 case ISD::STRICT_FP_TO_UINT:
3352 return LowerFP_TO_INT(Op, DAG);
3353 case ISD::FSINCOS:
3354 return LowerFSINCOS(Op, DAG);
3355 case ISD::FLT_ROUNDS_:
3356 return LowerFLT_ROUNDS_(Op, DAG);
3357 case ISD::MUL:
3358 return LowerMUL(Op, DAG);
3359 case ISD::INTRINSIC_WO_CHAIN:
3360 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3361 case ISD::STORE:
3362 return LowerSTORE(Op, DAG);
3363 case ISD::VECREDUCE_ADD:
3364 case ISD::VECREDUCE_SMAX:
3365 case ISD::VECREDUCE_SMIN:
3366 case ISD::VECREDUCE_UMAX:
3367 case ISD::VECREDUCE_UMIN:
3368 case ISD::VECREDUCE_FMAX:
3369 case ISD::VECREDUCE_FMIN:
3370 return LowerVECREDUCE(Op, DAG);
3371 case ISD::ATOMIC_LOAD_SUB:
3372 return LowerATOMIC_LOAD_SUB(Op, DAG);
3373 case ISD::ATOMIC_LOAD_AND:
3374 return LowerATOMIC_LOAD_AND(Op, DAG);
3375 case ISD::DYNAMIC_STACKALLOC:
3376 return LowerDYNAMIC_STACKALLOC(Op, DAG);
3377 case ISD::VSCALE:
3378 return LowerVSCALE(Op, DAG);
3379 }
3380}
3381
3382//===----------------------------------------------------------------------===//
3383// Calling Convention Implementation
3384//===----------------------------------------------------------------------===//
3385
3386/// Selects the correct CCAssignFn for a given CallingConvention value.
3387CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
3388 bool IsVarArg) const {
3389 switch (CC) {
3390 default:
3391 report_fatal_error("Unsupported calling convention.");
3392 case CallingConv::WebKit_JS:
3393 return CC_AArch64_WebKit_JS;
3394 case CallingConv::GHC:
3395 return CC_AArch64_GHC;
3396 case CallingConv::C:
3397 case CallingConv::Fast:
3398 case CallingConv::PreserveMost:
3399 case CallingConv::CXX_FAST_TLS:
3400 case CallingConv::Swift:
3401 if (Subtarget->isTargetWindows() && IsVarArg)
3402 return CC_AArch64_Win64_VarArg;
3403 if (!Subtarget->isTargetDarwin())
3404 return CC_AArch64_AAPCS;
3405 if (!IsVarArg)
3406 return CC_AArch64_DarwinPCS;
3407 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
3408 : CC_AArch64_DarwinPCS_VarArg;
3409 case CallingConv::Win64:
3410 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
3411 case CallingConv::CFGuard_Check:
3412 return CC_AArch64_Win64_CFGuard_Check;
3413 case CallingConv::AArch64_VectorCall:
3414 case CallingConv::AArch64_SVE_VectorCall:
3415 return CC_AArch64_AAPCS;
3416 }
3417}
3418
3419CCAssignFn *
3420AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
3421 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3422 : RetCC_AArch64_AAPCS;
3423}
3424
3425SDValue AArch64TargetLowering::LowerFormalArguments(
3426 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3427 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3428 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3429 MachineFunction &MF = DAG.getMachineFunction();
3430 MachineFrameInfo &MFI = MF.getFrameInfo();
3431 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3432
3433 // Assign locations to all of the incoming arguments.
3434 SmallVector<CCValAssign, 16> ArgLocs;
3435 DenseMap<unsigned, SDValue> CopiedRegs;
3436 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3437 *DAG.getContext());
3438
3439 // At this point, Ins[].VT may already be promoted to i32. To correctly
3440 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3441 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3442 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
3443 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
3444 // LocVT.
3445 unsigned NumArgs = Ins.size();
3446 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
3447 unsigned CurArgIdx = 0;
3448 for (unsigned i = 0; i != NumArgs; ++i) {
3449 MVT ValVT = Ins[i].VT;
3450 if (Ins[i].isOrigArg()) {
3451 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
3452 CurArgIdx = Ins[i].getOrigArgIndex();
3453
3454 // Get type of the original argument.
3455 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
3456 /*AllowUnknown*/ true);
3457 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
3458 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3459 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3460 ValVT = MVT::i8;
3461 else if (ActualMVT == MVT::i16)
3462 ValVT = MVT::i16;
3463 }
3464 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3465 bool Res =
3466 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
3467 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3467, __PRETTY_FUNCTION__))
;
3468 (void)Res;
3469 }
3470 assert(ArgLocs.size() == Ins.size())((ArgLocs.size() == Ins.size()) ? static_cast<void> (0)
: __assert_fail ("ArgLocs.size() == Ins.size()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3470, __PRETTY_FUNCTION__))
;
3471 SmallVector<SDValue, 16> ArgValues;
3472 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3473 CCValAssign &VA = ArgLocs[i];
3474
3475 if (Ins[i].Flags.isByVal()) {
3476 // Byval is used for HFAs in the PCS, but the system should work in a
3477 // non-compliant manner for larger structs.
3478 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3479 int Size = Ins[i].Flags.getByValSize();
3480 unsigned NumRegs = (Size + 7) / 8;
3481
3482 // FIXME: This works on big-endian for composite byvals, which are the common
3483 // case. It should also work for fundamental types too.
3484 unsigned FrameIdx =
3485 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
3486 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
3487 InVals.push_back(FrameIdxN);
3488
3489 continue;
3490 }
3491
3492 SDValue ArgValue;
3493 if (VA.isRegLoc()) {
3494 // Arguments stored in registers.
3495 EVT RegVT = VA.getLocVT();
3496 const TargetRegisterClass *RC;
3497
3498 if (RegVT == MVT::i32)
3499 RC = &AArch64::GPR32RegClass;
3500 else if (RegVT == MVT::i64)
3501 RC = &AArch64::GPR64RegClass;
3502 else if (RegVT == MVT::f16)
3503 RC = &AArch64::FPR16RegClass;
3504 else if (RegVT == MVT::f32)
3505 RC = &AArch64::FPR32RegClass;
3506 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
3507 RC = &AArch64::FPR64RegClass;
3508 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
3509 RC = &AArch64::FPR128RegClass;
3510 else if (RegVT.isScalableVector() &&
3511 RegVT.getVectorElementType() == MVT::i1)
3512 RC = &AArch64::PPRRegClass;
3513 else if (RegVT.isScalableVector())
3514 RC = &AArch64::ZPRRegClass;
3515 else
3516 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3516)
;
3517
3518 // Transform the arguments in physical registers into virtual ones.
3519 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3520 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
3521
3522 // If this is an 8, 16 or 32-bit value, it is really passed promoted
3523 // to 64 bits. Insert an assert[sz]ext to capture this, then
3524 // truncate to the right size.
3525 switch (VA.getLocInfo()) {
3526 default:
3527 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3527)
;
3528 case CCValAssign::Full:
3529 break;
3530 case CCValAssign::Indirect:
3531 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3532, __PRETTY_FUNCTION__))
3532 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3532, __PRETTY_FUNCTION__))
;
3533 break;
3534 case CCValAssign::BCvt:
3535 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
3536 break;
3537 case CCValAssign::AExt:
3538 case CCValAssign::SExt:
3539 case CCValAssign::ZExt:
3540 break;
3541 case CCValAssign::AExtUpper:
3542 ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
3543 DAG.getConstant(32, DL, RegVT));
3544 ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
3545 break;
3546 }
3547 } else { // VA.isRegLoc()
3548 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")((VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3548, __PRETTY_FUNCTION__))
;
3549 unsigned ArgOffset = VA.getLocMemOffset();
3550 unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
3551 ? VA.getLocVT().getSizeInBits()
3552 : VA.getValVT().getSizeInBits()) / 8;
3553
3554 uint32_t BEAlign = 0;
3555 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
3556 !Ins[i].Flags.isInConsecutiveRegs())
3557 BEAlign = 8 - ArgSize;
3558
3559 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
3560
3561 // Create load nodes to retrieve arguments from the stack.
3562 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3563
3564 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
3565 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
3566 MVT MemVT = VA.getValVT();
3567
3568 switch (VA.getLocInfo()) {
3569 default:
3570 break;
3571 case CCValAssign::Trunc:
3572 case CCValAssign::BCvt:
3573 MemVT = VA.getLocVT();
3574 break;
3575 case CCValAssign::Indirect:
3576 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3577, __PRETTY_FUNCTION__))
3577 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3577, __PRETTY_FUNCTION__))
;
3578 MemVT = VA.getLocVT();
3579 break;
3580 case CCValAssign::SExt:
3581 ExtType = ISD::SEXTLOAD;
3582 break;
3583 case CCValAssign::ZExt:
3584 ExtType = ISD::ZEXTLOAD;
3585 break;
3586 case CCValAssign::AExt:
3587 ExtType = ISD::EXTLOAD;
3588 break;
3589 }
3590
3591 ArgValue = DAG.getExtLoad(
3592 ExtType, DL, VA.getLocVT(), Chain, FIN,
3593 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3594 MemVT);
3595
3596 }
3597
3598 if (VA.getLocInfo() == CCValAssign::Indirect) {
3599 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3600, __PRETTY_FUNCTION__))
3600 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3600, __PRETTY_FUNCTION__))
;
3601 // If value is passed via pointer - do a load.
3602 ArgValue =
3603 DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, MachinePointerInfo());
3604 }
3605
3606 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
3607 ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
3608 ArgValue, DAG.getValueType(MVT::i32));
3609 InVals.push_back(ArgValue);
3610 }
3611
3612 // varargs
3613 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3614 if (isVarArg) {
3615 if (!Subtarget->isTargetDarwin() || IsWin64) {
3616 // The AAPCS variadic function ABI is identical to the non-variadic
3617 // one. As a result there may be more arguments in registers and we should
3618 // save them for future reference.
3619 // Win64 variadic functions also pass arguments in registers, but all float
3620 // arguments are passed in integer registers.
3621 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
3622 }
3623
3624 // This will point to the next argument passed via stack.
3625 unsigned StackOffset = CCInfo.getNextStackOffset();
3626 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
3627 StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
3628 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
3629
3630 if (MFI.hasMustTailInVarArgFunc()) {
3631 SmallVector<MVT, 2> RegParmTypes;
3632 RegParmTypes.push_back(MVT::i64);
3633 RegParmTypes.push_back(MVT::f128);
3634 // Compute the set of forwarded registers. The rest are scratch.
3635 SmallVectorImpl<ForwardedRegister> &Forwards =
3636 FuncInfo->getForwardedMustTailRegParms();
3637 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
3638 CC_AArch64_AAPCS);
3639
3640 // Conservatively forward X8, since it might be used for aggregate return.
3641 if (!CCInfo.isAllocated(AArch64::X8)) {
3642 unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
3643 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
3644 }
3645 }
3646 }
3647
3648 // On Windows, InReg pointers must be returned, so record the pointer in a
3649 // virtual register at the start of the function so it can be returned in the
3650 // epilogue.
3651 if (IsWin64) {
3652 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3653 if (Ins[I].Flags.isInReg()) {
3654 assert(!FuncInfo->getSRetReturnReg())((!FuncInfo->getSRetReturnReg()) ? static_cast<void>
(0) : __assert_fail ("!FuncInfo->getSRetReturnReg()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3654, __PRETTY_FUNCTION__))
;
3655
3656 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3657 Register Reg =
3658 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3659 FuncInfo->setSRetReturnReg(Reg);
3660
3661 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
3662 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
3663 break;
3664 }
3665 }
3666 }
3667
3668 unsigned StackArgSize = CCInfo.getNextStackOffset();
3669 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3670 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
3671 // This is a non-standard ABI so by fiat I say we're allowed to make full
3672 // use of the stack area to be popped, which must be aligned to 16 bytes in
3673 // any case:
3674 StackArgSize = alignTo(StackArgSize, 16);
3675
3676 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
3677 // a multiple of 16.
3678 FuncInfo->setArgumentStackToRestore(StackArgSize);
3679
3680 // This realignment carries over to the available bytes below. Our own
3681 // callers will guarantee the space is free by giving an aligned value to
3682 // CALLSEQ_START.
3683 }
3684 // Even if we're not expected to free up the space, it's useful to know how
3685 // much is there while considering tail calls (because we can reuse it).
3686 FuncInfo->setBytesInStackArgArea(StackArgSize);
3687
3688 if (Subtarget->hasCustomCallingConv())
3689 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
3690
3691 return Chain;
3692}
3693
3694void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
3695 SelectionDAG &DAG,
3696 const SDLoc &DL,
3697 SDValue &Chain) const {
3698 MachineFunction &MF = DAG.getMachineFunction();
3699 MachineFrameInfo &MFI = MF.getFrameInfo();
3700 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3701 auto PtrVT = getPointerTy(DAG.getDataLayout());
3702 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3703
3704 SmallVector<SDValue, 8> MemOps;
3705
3706 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
3707 AArch64::X3, AArch64::X4, AArch64::X5,
3708 AArch64::X6, AArch64::X7 };
3709 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
3710 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
3711
3712 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
3713 int GPRIdx = 0;
3714 if (GPRSaveSize != 0) {
3715 if (IsWin64) {
3716 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
3717 if (GPRSaveSize & 15)
3718 // The extra size here, if triggered, will always be 8.
3719 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
3720 } else
3721 GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
3722
3723 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
3724
3725 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
3726 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
3727 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
3728 SDValue Store = DAG.getStore(
3729 Val.getValue(1), DL, Val, FIN,
3730 IsWin64
3731 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
3732 GPRIdx,
3733 (i - FirstVariadicGPR) * 8)
3734 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
3735 MemOps.push_back(Store);
3736 FIN =
3737 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
3738 }
3739 }
3740 FuncInfo->setVarArgsGPRIndex(GPRIdx);
3741 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
3742
3743 if (Subtarget->hasFPARMv8() && !IsWin64) {
3744 static const MCPhysReg FPRArgRegs[] = {
3745 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
3746 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
3747 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
3748 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
3749
3750 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
3751 int FPRIdx = 0;
3752 if (FPRSaveSize != 0) {
3753 FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
3754
3755 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
3756
3757 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
3758 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
3759 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
3760
3761 SDValue Store = DAG.getStore(
3762 Val.getValue(1), DL, Val, FIN,
3763 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
3764 MemOps.push_back(Store);
3765 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3766 DAG.getConstant(16, DL, PtrVT));
3767 }
3768 }
3769 FuncInfo->setVarArgsFPRIndex(FPRIdx);
3770 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3771 }
3772
3773 if (!MemOps.empty()) {
3774 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3775 }
3776}
3777
3778/// LowerCallResult - Lower the result values of a call into the
3779/// appropriate copies out of appropriate physical registers.
3780SDValue AArch64TargetLowering::LowerCallResult(
3781 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3782 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3783 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3784 SDValue ThisVal) const {
3785 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3786 ? RetCC_AArch64_WebKit_JS
3787 : RetCC_AArch64_AAPCS;
3788 // Assign locations to each value returned by this call.
3789 SmallVector<CCValAssign, 16> RVLocs;
3790 DenseMap<unsigned, SDValue> CopiedRegs;
3791 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3792 *DAG.getContext());
3793 CCInfo.AnalyzeCallResult(Ins, RetCC);
3794
3795 // Copy all of the result registers out of their specified physreg.
3796 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3797 CCValAssign VA = RVLocs[i];
3798
3799 // Pass 'this' value directly from the argument to return value, to avoid
3800 // reg unit interference
3801 if (i == 0 && isThisReturn) {
3802 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3803, __PRETTY_FUNCTION__))
3803 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3803, __PRETTY_FUNCTION__))
;
3804 InVals.push_back(ThisVal);
3805 continue;
3806 }
3807
3808 // Avoid copying a physreg twice since RegAllocFast is incompetent and only
3809 // allows one use of a physreg per block.
3810 SDValue Val = CopiedRegs.lookup(VA.getLocReg());
3811 if (!Val) {
3812 Val =
3813 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3814 Chain = Val.getValue(1);
3815 InFlag = Val.getValue(2);
3816 CopiedRegs[VA.getLocReg()] = Val;
3817 }
3818
3819 switch (VA.getLocInfo()) {
3820 default:
3821 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3821)
;
3822 case CCValAssign::Full:
3823 break;
3824 case CCValAssign::BCvt:
3825 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3826 break;
3827 case CCValAssign::AExtUpper:
3828 Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
3829 DAG.getConstant(32, DL, VA.getLocVT()));
3830 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3831 case CCValAssign::AExt:
3832 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3833 case CCValAssign::ZExt:
3834 Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
3835 break;
3836 }
3837
3838 InVals.push_back(Val);
3839 }
3840
3841 return Chain;
3842}
3843
3844/// Return true if the calling convention is one that we can guarantee TCO for.
3845static bool canGuaranteeTCO(CallingConv::ID CC) {
3846 return CC == CallingConv::Fast;
3847}
3848
3849/// Return true if we might ever do TCO for calls with this calling convention.
3850static bool mayTailCallThisCC(CallingConv::ID CC) {
3851 switch (CC) {
3852 case CallingConv::C:
3853 case CallingConv::PreserveMost:
3854 case CallingConv::Swift:
3855 return true;
3856 default:
3857 return canGuaranteeTCO(CC);
3858 }
3859}
3860
3861bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3862 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3863 const SmallVectorImpl<ISD::OutputArg> &Outs,
3864 const SmallVectorImpl<SDValue> &OutVals,
3865 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3866 if (!mayTailCallThisCC(CalleeCC))
3867 return false;
3868
3869 MachineFunction &MF = DAG.getMachineFunction();
3870 const Function &CallerF = MF.getFunction();
3871 CallingConv::ID CallerCC = CallerF.getCallingConv();
3872 bool CCMatch = CallerCC == CalleeCC;
3873
3874 // Byval parameters hand the function a pointer directly into the stack area
3875 // we want to reuse during a tail call. Working around this *is* possible (see
3876 // X86) but less efficient and uglier in LowerCall.
3877 for (Function::const_arg_iterator i = CallerF.arg_begin(),
3878 e = CallerF.arg_end();
3879 i != e; ++i) {
3880 if (i->hasByValAttr())
3881 return false;
3882
3883 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
3884 // In this case, it is necessary to save/restore X0 in the callee. Tail
3885 // call opt interferes with this. So we disable tail call opt when the
3886 // caller has an argument with "inreg" attribute.
3887
3888 // FIXME: Check whether the callee also has an "inreg" argument.
3889 if (i->hasInRegAttr())
3890 return false;
3891 }
3892
3893 if (getTargetMachine().Options.GuaranteedTailCallOpt)
3894 return canGuaranteeTCO(CalleeCC) && CCMatch;
3895
3896 // Externally-defined functions with weak linkage should not be
3897 // tail-called on AArch64 when the OS does not support dynamic
3898 // pre-emption of symbols, as the AAELF spec requires normal calls
3899 // to undefined weak functions to be replaced with a NOP or jump to the
3900 // next instruction. The behaviour of branch instructions in this
3901 // situation (as used for tail calls) is implementation-defined, so we
3902 // cannot rely on the linker replacing the tail call with a return.
3903 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3904 const GlobalValue *GV = G->getGlobal();
3905 const Triple &TT = getTargetMachine().getTargetTriple();
3906 if (GV->hasExternalWeakLinkage() &&
3907 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3908 return false;
3909 }
3910
3911 // Now we search for cases where we can use a tail call without changing the
3912 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3913 // concept.
3914
3915 // I want anyone implementing a new calling convention to think long and hard
3916 // about this assert.
3917 assert((!isVarArg || CalleeCC == CallingConv::C) &&(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3918, __PRETTY_FUNCTION__))
3918 "Unexpected variadic calling convention")(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3918, __PRETTY_FUNCTION__))
;
3919
3920 LLVMContext &C = *DAG.getContext();
3921 if (isVarArg && !Outs.empty()) {
3922 // At least two cases here: if caller is fastcc then we can't have any
3923 // memory arguments (we'd be expected to clean up the stack afterwards). If
3924 // caller is C then we could potentially use its argument area.
3925
3926 // FIXME: for now we take the most conservative of these in both cases:
3927 // disallow all variadic memory operands.
3928 SmallVector<CCValAssign, 16> ArgLocs;
3929 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3930
3931 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3932 for (const CCValAssign &ArgLoc : ArgLocs)
3933 if (!ArgLoc.isRegLoc())
3934 return false;
3935 }
3936
3937 // Check that the call results are passed in the same way.
3938 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3939 CCAssignFnForCall(CalleeCC, isVarArg),
3940 CCAssignFnForCall(CallerCC, isVarArg)))
3941 return false;
3942 // The callee has to preserve all registers the caller needs to preserve.
3943 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3944 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3945 if (!CCMatch) {
3946 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3947 if (Subtarget->hasCustomCallingConv()) {
3948 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
3949 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
3950 }
3951 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3952 return false;
3953 }
3954
3955 // Nothing more to check if the callee is taking no arguments
3956 if (Outs.empty())
3957 return true;
3958
3959 SmallVector<CCValAssign, 16> ArgLocs;
3960 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3961
3962 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3963
3964 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3965
3966 // If any of the arguments is passed indirectly, it must be SVE, so the
3967 // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
3968 // allocate space on the stack. That is why we determine this explicitly here
3969 // the call cannot be a tailcall.
3970 if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
3971 assert((A.getLocInfo() != CCValAssign::Indirect ||(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector
()) && "Expected value to be scalable") ? static_cast
<void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3973, __PRETTY_FUNCTION__))
3972 A.getValVT().isScalableVector()) &&(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector
()) && "Expected value to be scalable") ? static_cast
<void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3973, __PRETTY_FUNCTION__))
3973 "Expected value to be scalable")(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector
()) && "Expected value to be scalable") ? static_cast
<void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3973, __PRETTY_FUNCTION__))
;
3974 return A.getLocInfo() == CCValAssign::Indirect;
3975 }))
3976 return false;
3977
3978 // If the stack arguments for this call do not fit into our own save area then
3979 // the call cannot be made tail.
3980 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3981 return false;
3982
3983 const MachineRegisterInfo &MRI = MF.getRegInfo();
3984 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3985 return false;
3986
3987 return true;
3988}
3989
3990SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3991 SelectionDAG &DAG,
3992 MachineFrameInfo &MFI,
3993 int ClobberedFI) const {
3994 SmallVector<SDValue, 8> ArgChains;
3995 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3996 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3997
3998 // Include the original chain at the beginning of the list. When this is
3999 // used by target LowerCall hooks, this helps legalize find the
4000 // CALLSEQ_BEGIN node.
4001 ArgChains.push_back(Chain);
4002
4003 // Add a chain value for each stack argument corresponding
4004 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
4005 UE = DAG.getEntryNode().getNode()->use_end();
4006 U != UE; ++U)
4007 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
4008 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
4009 if (FI->getIndex() < 0) {
4010 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
4011 int64_t InLastByte = InFirstByte;
4012 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
4013
4014 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
4015 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
4016 ArgChains.push_back(SDValue(L, 1));
4017 }
4018
4019 // Build a tokenfactor for all the chains.
4020 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
4021}
4022
4023bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
4024 bool TailCallOpt) const {
4025 return CallCC == CallingConv::Fast && TailCallOpt;
4026}
4027
4028/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
4029/// and add input and output parameter nodes.
4030SDValue
4031AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
4032 SmallVectorImpl<SDValue> &InVals) const {
4033 SelectionDAG &DAG = CLI.DAG;
4034 SDLoc &DL = CLI.DL;
4035 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
4036 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
4037 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
4038 SDValue Chain = CLI.Chain;
4039 SDValue Callee = CLI.Callee;
4040 bool &IsTailCall = CLI.IsTailCall;
4041 CallingConv::ID CallConv = CLI.CallConv;
4042 bool IsVarArg = CLI.IsVarArg;
4043
4044 MachineFunction &MF = DAG.getMachineFunction();
4045 MachineFunction::CallSiteInfo CSInfo;
4046 bool IsThisReturn = false;
4047
4048 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4049 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4050 bool IsSibCall = false;
4051
4052 if (IsTailCall) {
4053 // Check if it's really possible to do a tail call.
4054 IsTailCall = isEligibleForTailCallOptimization(
4055 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
4056 if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
4057 report_fatal_error("failed to perform tail call elimination on a call "
4058 "site marked musttail");
4059
4060 // A sibling call is one where we're under the usual C ABI and not planning
4061 // to change that but can still do a tail call:
4062 if (!TailCallOpt && IsTailCall)
4063 IsSibCall = true;
4064
4065 if (IsTailCall)
4066 ++NumTailCalls;
4067 }
4068
4069 // Analyze operands of the call, assigning locations to each operand.
4070 SmallVector<CCValAssign, 16> ArgLocs;
4071 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
4072 *DAG.getContext());
4073
4074 if (IsVarArg) {
4075 // Handle fixed and variable vector arguments differently.
4076 // Variable vector arguments always go into memory.
4077 unsigned NumArgs = Outs.size();
4078
4079 for (unsigned i = 0; i != NumArgs; ++i) {
4080 MVT ArgVT = Outs[i].VT;
4081 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4082 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
4083 /*IsVarArg=*/ !Outs[i].IsFixed);
4084 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
4085 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4085, __PRETTY_FUNCTION__))
;
4086 (void)Res;
4087 }
4088 } else {
4089 // At this point, Outs[].VT may already be promoted to i32. To correctly
4090 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
4091 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
4092 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
4093 // we use a special version of AnalyzeCallOperands to pass in ValVT and
4094 // LocVT.
4095 unsigned NumArgs = Outs.size();
4096 for (unsigned i = 0; i != NumArgs; ++i) {
4097 MVT ValVT = Outs[i].VT;
4098 // Get type of the original argument.
4099 EVT ActualVT = getValueType(DAG.getDataLayout(),
4100 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
4101 /*AllowUnknown*/ true);
4102 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
4103 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4104 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
4105 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
4106 ValVT = MVT::i8;
4107 else if (ActualMVT == MVT::i16)
4108 ValVT = MVT::i16;
4109
4110 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
4111 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
4112 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4112, __PRETTY_FUNCTION__))
;
4113 (void)Res;
4114 }
4115 }
4116
4117 // Get a count of how many bytes are to be pushed on the stack.
4118 unsigned NumBytes = CCInfo.getNextStackOffset();
4119
4120 if (IsSibCall) {
4121 // Since we're not changing the ABI to make this a tail call, the memory
4122 // operands are already available in the caller's incoming argument space.
4123 NumBytes = 0;
4124 }
4125
4126 // FPDiff is the byte offset of the call's argument area from the callee's.
4127 // Stores to callee stack arguments will be placed in FixedStackSlots offset
4128 // by this amount for a tail call. In a sibling call it must be 0 because the
4129 // caller will deallocate the entire stack and the callee still expects its
4130 // arguments to begin at SP+0. Completely unused for non-tail calls.
4131 int FPDiff = 0;
4132
4133 if (IsTailCall && !IsSibCall) {
4134 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
4135
4136 // Since callee will pop argument stack as a tail call, we must keep the
4137 // popped size 16-byte aligned.
4138 NumBytes = alignTo(NumBytes, 16);
4139
4140 // FPDiff will be negative if this tail call requires more space than we
4141 // would automatically have in our incoming argument space. Positive if we
4142 // can actually shrink the stack.
4143 FPDiff = NumReusableBytes - NumBytes;
4144
4145 // The stack pointer must be 16-byte aligned at all times it's used for a
4146 // memory operation, which in practice means at *all* times and in
4147 // particular across call boundaries. Therefore our own arguments started at
4148 // a 16-byte aligned SP and the delta applied for the tail call should
4149 // satisfy the same constraint.
4150 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")((FPDiff % 16 == 0 && "unaligned stack on tail call")
? static_cast<void> (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4150, __PRETTY_FUNCTION__))
;
4151 }
4152
4153 // Adjust the stack pointer for the new arguments...
4154 // These operations are automatically eliminated by the prolog/epilog pass
4155 if (!IsSibCall)
4156 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
4157
4158 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
4159 getPointerTy(DAG.getDataLayout()));
4160
4161 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4162 SmallSet<unsigned, 8> RegsUsed;
4163 SmallVector<SDValue, 8> MemOpChains;
4164 auto PtrVT = getPointerTy(DAG.getDataLayout());
4165
4166 if (IsVarArg && CLI.CS && CLI.CS.isMustTailCall()) {
4167 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
4168 for (const auto &F : Forwards) {
4169 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
4170 RegsToPass.emplace_back(F.PReg, Val);
4171 }
4172 }
4173
4174 // Walk the register/memloc assignments, inserting copies/loads.
4175 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4176 CCValAssign &VA = ArgLocs[i];
4177 SDValue Arg = OutVals[i];
4178 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4179
4180 // Promote the value if needed.
4181 switch (VA.getLocInfo()) {
4182 default:
4183 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4183)
;
4184 case CCValAssign::Full:
4185 break;
4186 case CCValAssign::SExt:
4187 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
4188 break;
4189 case CCValAssign::ZExt:
4190 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
4191 break;
4192 case CCValAssign::AExt:
4193 if (Outs[i].ArgVT == MVT::i1) {
4194 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
4195 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
4196 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
4197 }
4198 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
4199 break;
4200 case CCValAssign::AExtUpper:
4201 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4201, __PRETTY_FUNCTION__))
;
4202 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
4203 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
4204 DAG.getConstant(32, DL, VA.getLocVT()));
4205 break;
4206 case CCValAssign::BCvt:
4207 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
4208 break;
4209 case CCValAssign::Trunc:
4210 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
4211 break;
4212 case CCValAssign::FPExt:
4213 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
4214 break;
4215 case CCValAssign::Indirect:
4216 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4217, __PRETTY_FUNCTION__))
4217 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4217, __PRETTY_FUNCTION__))
;
4218 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4219 Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
4220 unsigned Align = DAG.getDataLayout().getPrefTypeAlignment(Ty);
4221 int FI = MFI.CreateStackObject(
4222 VA.getValVT().getStoreSize().getKnownMinSize(), Align, false);
4223 MFI.setStackID(FI, TargetStackID::SVEVector);
4224
4225 SDValue SpillSlot = DAG.getFrameIndex(
4226 FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
4227 Chain = DAG.getStore(
4228 Chain, DL, Arg, SpillSlot,
4229 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4230 Arg = SpillSlot;
4231 break;
4232 }
4233
4234 if (VA.isRegLoc()) {
4235 if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
4236 Outs[0].VT == MVT::i64) {
4237 assert(VA.getLocVT() == MVT::i64 &&((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4238, __PRETTY_FUNCTION__))
4238 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4238, __PRETTY_FUNCTION__))
;
4239 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4240, __PRETTY_FUNCTION__))
4240 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4240, __PRETTY_FUNCTION__))
;
4241 IsThisReturn = true;
4242 }
4243 if (RegsUsed.count(VA.getLocReg())) {
4244 // If this register has already been used then we're trying to pack
4245 // parts of an [N x i32] into an X-register. The extension type will
4246 // take care of putting the two halves in the right place but we have to
4247 // combine them.
4248 SDValue &Bits =
4249 std::find_if(RegsToPass.begin(), RegsToPass.end(),
4250 [=](const std::pair<unsigned, SDValue> &Elt) {
4251 return Elt.first == VA.getLocReg();
4252 })
4253 ->second;
4254 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
4255 // Call site info is used for function's parameter entry value
4256 // tracking. For now we track only simple cases when parameter
4257 // is transferred through whole register.
4258 CSInfo.erase(std::remove_if(CSInfo.begin(), CSInfo.end(),
4259 [&VA](MachineFunction::ArgRegPair ArgReg) {
4260 return ArgReg.Reg == VA.getLocReg();
4261 }),
4262 CSInfo.end());
4263 } else {
4264 RegsToPass.emplace_back(VA.getLocReg(), Arg);
4265 RegsUsed.insert(VA.getLocReg());
4266 const TargetOptions &Options = DAG.getTarget().Options;
4267 if (Options.EnableDebugEntryValues)
4268 CSInfo.emplace_back(VA.getLocReg(), i);
4269 }
4270 } else {
4271 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4271, __PRETTY_FUNCTION__))
;
4272
4273 SDValue DstAddr;
4274 MachinePointerInfo DstInfo;
4275
4276 // FIXME: This works on big-endian for composite byvals, which are the
4277 // common case. It should also work for fundamental types too.
4278 uint32_t BEAlign = 0;
4279 unsigned OpSize;
4280 if (VA.getLocInfo() == CCValAssign::Indirect)
4281 OpSize = VA.getLocVT().getSizeInBits();
4282 else
4283 OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
4284 : VA.getValVT().getSizeInBits();
4285 OpSize = (OpSize + 7) / 8;
4286 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
4287 !Flags.isInConsecutiveRegs()) {
4288 if (OpSize < 8)
4289 BEAlign = 8 - OpSize;
4290 }
4291 unsigned LocMemOffset = VA.getLocMemOffset();
4292 int32_t Offset = LocMemOffset + BEAlign;
4293 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
4294 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
4295
4296 if (IsTailCall) {
4297 Offset = Offset + FPDiff;
4298 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4299
4300 DstAddr = DAG.getFrameIndex(FI, PtrVT);
4301 DstInfo =
4302 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
4303
4304 // Make sure any stack arguments overlapping with where we're storing
4305 // are loaded before this eventual operation. Otherwise they'll be
4306 // clobbered.
4307 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
4308 } else {
4309 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
4310
4311 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
4312 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
4313 LocMemOffset);
4314 }
4315
4316 if (Outs[i].Flags.isByVal()) {
4317 SDValue SizeNode =
4318 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
4319 SDValue Cpy = DAG.getMemcpy(
4320 Chain, DL, DstAddr, Arg, SizeNode,
4321 Outs[i].Flags.getNonZeroByValAlign(),
4322 /*isVol = */ false, /*AlwaysInline = */ false,
4323 /*isTailCall = */ false, DstInfo, MachinePointerInfo());
4324
4325 MemOpChains.push_back(Cpy);
4326 } else {
4327 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
4328 // promoted to a legal register type i32, we should truncate Arg back to
4329 // i1/i8/i16.
4330 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
4331 VA.getValVT() == MVT::i16)
4332 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
4333
4334 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
4335 MemOpChains.push_back(Store);
4336 }
4337 }
4338 }
4339
4340 if (!MemOpChains.empty())
4341 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4342
4343 // Build a sequence of copy-to-reg nodes chained together with token chain
4344 // and flag operands which copy the outgoing args into the appropriate regs.
4345 SDValue InFlag;
4346 for (auto &RegToPass : RegsToPass) {
4347 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
4348 RegToPass.second, InFlag);
4349 InFlag = Chain.getValue(1);
4350 }
4351
4352 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
4353 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
4354 // node so that legalize doesn't hack it.
4355 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4356 auto GV = G->getGlobal();
4357 unsigned OpFlags =
4358 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
4359 if (OpFlags & AArch64II::MO_GOT) {
4360 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
4361 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
4362 } else {
4363 const GlobalValue *GV = G->getGlobal();
4364 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
4365 }
4366 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4367 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4368 Subtarget->isTargetMachO()) {
4369 const char *Sym = S->getSymbol();
4370 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
4371 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
4372 } else {
4373 const char *Sym = S->getSymbol();
4374 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
4375 }
4376 }
4377
4378 // We don't usually want to end the call-sequence here because we would tidy
4379 // the frame up *after* the call, however in the ABI-changing tail-call case
4380 // we've carefully laid out the parameters so that when sp is reset they'll be
4381 // in the correct location.
4382 if (IsTailCall && !IsSibCall) {
4383 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
4384 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
4385 InFlag = Chain.getValue(1);
4386 }
4387
4388 std::vector<SDValue> Ops;
4389 Ops.push_back(Chain);
4390 Ops.push_back(Callee);
4391
4392 if (IsTailCall) {
4393 // Each tail call may have to adjust the stack by a different amount, so
4394 // this information must travel along with the operation for eventual
4395 // consumption by emitEpilogue.
4396 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
4397 }
4398
4399 // Add argument registers to the end of the list so that they are known live
4400 // into the call.
4401 for (auto &RegToPass : RegsToPass)
4402 Ops.push_back(DAG.getRegister(RegToPass.first,
4403 RegToPass.second.getValueType()));
4404
4405 // Check callee args/returns for SVE registers and set calling convention
4406 // accordingly.
4407 if (CallConv == CallingConv::C) {
4408 bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
4409 return Out.VT.isScalableVector();
4410 });
4411 bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
4412 return In.VT.isScalableVector();
4413 });
4414
4415 if (CalleeInSVE || CalleeOutSVE)
4416 CallConv = CallingConv::AArch64_SVE_VectorCall;
4417 }
4418
4419 // Add a register mask operand representing the call-preserved registers.
4420 const uint32_t *Mask;
4421 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4422 if (IsThisReturn) {
4423 // For 'this' returns, use the X0-preserving mask if applicable
4424 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
4425 if (!Mask) {
4426 IsThisReturn = false;
4427 Mask = TRI->getCallPreservedMask(MF, CallConv);
4428 }
4429 } else
4430 Mask = TRI->getCallPreservedMask(MF, CallConv);
4431
4432 if (Subtarget->hasCustomCallingConv())
4433 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
4434
4435 if (TRI->isAnyArgRegReserved(MF))
4436 TRI->emitReservedArgRegCallError(MF);
4437
4438 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4438, __PRETTY_FUNCTION__))
;
4439 Ops.push_back(DAG.getRegisterMask(Mask));
4440
4441 if (InFlag.getNode())
4442 Ops.push_back(InFlag);
4443
4444 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4445
4446 // If we're doing a tall call, use a TC_RETURN here rather than an
4447 // actual call instruction.
4448 if (IsTailCall) {
4449 MF.getFrameInfo().setHasTailCall();
4450 SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
4451 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4452 return Ret;
4453 }
4454
4455 // Returns a chain and a flag for retval copy to use.
4456 Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
4457 InFlag = Chain.getValue(1);
4458 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4459
4460 uint64_t CalleePopBytes =
4461 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
4462
4463 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
4464 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
4465 InFlag, DL);
4466 if (!Ins.empty())
4467 InFlag = Chain.getValue(1);
4468
4469 // Handle result values, copying them out of physregs into vregs that we
4470 // return.
4471 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
4472 InVals, IsThisReturn,
4473 IsThisReturn ? OutVals[0] : SDValue());
4474}
4475
4476bool AArch64TargetLowering::CanLowerReturn(
4477 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
4478 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4479 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
4480 ? RetCC_AArch64_WebKit_JS
4481 : RetCC_AArch64_AAPCS;
4482 SmallVector<CCValAssign, 16> RVLocs;
4483 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
4484 return CCInfo.CheckReturn(Outs, RetCC);
4485}
4486
4487SDValue
4488AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
4489 bool isVarArg,
4490 const SmallVectorImpl<ISD::OutputArg> &Outs,
4491 const SmallVectorImpl<SDValue> &OutVals,
4492 const SDLoc &DL, SelectionDAG &DAG) const {
4493 auto &MF = DAG.getMachineFunction();
4494 auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4495
4496 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
4497 ? RetCC_AArch64_WebKit_JS
4498 : RetCC_AArch64_AAPCS;
4499 SmallVector<CCValAssign, 16> RVLocs;
4500 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4501 *DAG.getContext());
4502 CCInfo.AnalyzeReturn(Outs, RetCC);
4503
4504 // Copy the result values into the output registers.
4505 SDValue Flag;
4506 SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
4507 SmallSet<unsigned, 4> RegsUsed;
4508 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
4509 ++i, ++realRVLocIdx) {
4510 CCValAssign &VA = RVLocs[i];
4511 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4511, __PRETTY_FUNCTION__))
;
4512 SDValue Arg = OutVals[realRVLocIdx];
4513
4514 switch (VA.getLocInfo()) {
4515 default:
4516 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4516)
;
4517 case CCValAssign::Full:
4518 if (Outs[i].ArgVT == MVT::i1) {
4519 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
4520 // value. This is strictly redundant on Darwin (which uses "zeroext
4521 // i1"), but will be optimised out before ISel.
4522 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
4523 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
4524 }
4525 break;
4526 case CCValAssign::BCvt:
4527 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
4528 break;
4529 case CCValAssign::AExt:
4530 case CCValAssign::ZExt:
4531 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
4532 break;
4533 case CCValAssign::AExtUpper:
4534 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4534, __PRETTY_FUNCTION__))
;
4535 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
4536 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
4537 DAG.getConstant(32, DL, VA.getLocVT()));
4538 break;
4539 }
4540
4541 if (RegsUsed.count(VA.getLocReg())) {
4542 SDValue &Bits =
4543 std::find_if(RetVals.begin(), RetVals.end(),
4544 [=](const std::pair<unsigned, SDValue> &Elt) {
4545 return Elt.first == VA.getLocReg();
4546 })
4547 ->second;
4548 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
4549 } else {
4550 RetVals.emplace_back(VA.getLocReg(), Arg);
4551 RegsUsed.insert(VA.getLocReg());
4552 }
4553 }
4554
4555 SmallVector<SDValue, 4> RetOps(1, Chain);
4556 for (auto &RetVal : RetVals) {
4557 Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
4558 Flag = Chain.getValue(1);
4559 RetOps.push_back(
4560 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
4561 }
4562
4563 // Windows AArch64 ABIs require that for returning structs by value we copy
4564 // the sret argument into X0 for the return.
4565 // We saved the argument into a virtual register in the entry block,
4566 // so now we copy the value out and into X0.
4567 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
4568 SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
4569 getPointerTy(MF.getDataLayout()));
4570
4571 unsigned RetValReg = AArch64::X0;
4572 Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
4573 Flag = Chain.getValue(1);
4574
4575 RetOps.push_back(
4576 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
4577 }
4578
4579 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4580 const MCPhysReg *I =
4581 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
4582 if (I) {
4583 for (; *I; ++I) {
4584 if (AArch64::GPR64RegClass.contains(*I))
4585 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
4586 else if (AArch64::FPR64RegClass.contains(*I))
4587 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
4588 else
4589 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4589)
;
4590 }
4591 }
4592
4593 RetOps[0] = Chain; // Update chain.
4594
4595 // Add the flag if we have it.
4596 if (Flag.getNode())
4597 RetOps.push_back(Flag);
4598
4599 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
4600}
4601
4602//===----------------------------------------------------------------------===//
4603// Other Lowering Code
4604//===----------------------------------------------------------------------===//
4605
4606SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
4607 SelectionDAG &DAG,
4608 unsigned Flag) const {
4609 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
4610 N->getOffset(), Flag);
4611}
4612
4613SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
4614 SelectionDAG &DAG,
4615 unsigned Flag) const {
4616 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
4617}
4618
4619SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
4620 SelectionDAG &DAG,
4621 unsigned Flag) const {
4622 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
4623 N->getOffset(), Flag);
4624}
4625
4626SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
4627 SelectionDAG &DAG,
4628 unsigned Flag) const {
4629 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
4630}
4631
4632// (loadGOT sym)
4633template <class NodeTy>
4634SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
4635 unsigned Flags) const {
4636 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
4637 SDLoc DL(N);
4638 EVT Ty = getPointerTy(DAG.getDataLayout());
4639 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
4640 // FIXME: Once remat is capable of dealing with instructions with register
4641 // operands, expand this into two nodes instead of using a wrapper node.
4642 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
4643}
4644
4645// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
4646template <class NodeTy>
4647SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
4648 unsigned Flags) const {
4649 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
4650 SDLoc DL(N);
4651 EVT Ty = getPointerTy(DAG.getDataLayout());
4652 const unsigned char MO_NC = AArch64II::MO_NC;
4653 return DAG.getNode(
4654 AArch64ISD::WrapperLarge, DL, Ty,
4655 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
4656 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
4657 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
4658 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
4659}
4660
4661// (addlow (adrp %hi(sym)) %lo(sym))
4662template <class NodeTy>
4663SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4664 unsigned Flags) const {
4665 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
4666 SDLoc DL(N);
4667 EVT Ty = getPointerTy(DAG.getDataLayout());
4668 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
4669 SDValue Lo = getTargetNode(N, Ty, DAG,
4670 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
4671 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
4672 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
4673}
4674
4675// (adr sym)
4676template <class NodeTy>
4677SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
4678 unsigned Flags) const {
4679 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrTiny\n"
; } } while (false)
;
4680 SDLoc DL(N);
4681 EVT Ty = getPointerTy(DAG.getDataLayout());
4682 SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
4683 return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
4684}
4685
4686SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
4687 SelectionDAG &DAG) const {
4688 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
4689 const GlobalValue *GV = GN->getGlobal();
4690 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
4691
4692 if (OpFlags != AArch64II::MO_NO_FLAG)
4693 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4694, __PRETTY_FUNCTION__))
4694 "unexpected offset in global node")((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4694, __PRETTY_FUNCTION__))
;
4695
4696 // This also catches the large code model case for Darwin, and tiny code
4697 // model with got relocations.
4698 if ((OpFlags & AArch64II::MO_GOT) != 0) {
4699 return getGOT(GN, DAG, OpFlags);
4700 }
4701
4702 SDValue Result;
4703 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4704 Result = getAddrLarge(GN, DAG, OpFlags);
4705 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4706 Result = getAddrTiny(GN, DAG, OpFlags);
4707 } else {
4708 Result = getAddr(GN, DAG, OpFlags);
4709 }
4710 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4711 SDLoc DL(GN);
4712 if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
4713 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4714 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
4715 return Result;
4716}
4717
4718/// Convert a TLS address reference into the correct sequence of loads
4719/// and calls to compute the variable's address (for Darwin, currently) and
4720/// return an SDValue containing the final node.
4721
4722/// Darwin only has one TLS scheme which must be capable of dealing with the
4723/// fully general situation, in the worst case. This means:
4724/// + "extern __thread" declaration.
4725/// + Defined in a possibly unknown dynamic library.
4726///
4727/// The general system is that each __thread variable has a [3 x i64] descriptor
4728/// which contains information used by the runtime to calculate the address. The
4729/// only part of this the compiler needs to know about is the first xword, which
4730/// contains a function pointer that must be called with the address of the
4731/// entire descriptor in "x0".
4732///
4733/// Since this descriptor may be in a different unit, in general even the
4734/// descriptor must be accessed via an indirect load. The "ideal" code sequence
4735/// is:
4736/// adrp x0, _var@TLVPPAGE
4737/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
4738/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
4739/// ; the function pointer
4740/// blr x1 ; Uses descriptor address in x0
4741/// ; Address of _var is now in x0.
4742///
4743/// If the address of _var's descriptor *is* known to the linker, then it can
4744/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
4745/// a slight efficiency gain.
4746SDValue
4747AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
4748 SelectionDAG &DAG) const {
4749 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4750, __PRETTY_FUNCTION__))
4750 "This function expects a Darwin target")((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4750, __PRETTY_FUNCTION__))
;
4751
4752 SDLoc DL(Op);
4753 MVT PtrVT = getPointerTy(DAG.getDataLayout());
4754 MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
4755 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4756
4757 SDValue TLVPAddr =
4758 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4759 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
4760
4761 // The first entry in the descriptor is a function pointer that we must call
4762 // to obtain the address of the variable.
4763 SDValue Chain = DAG.getEntryNode();
4764 SDValue FuncTLVGet = DAG.getLoad(
4765 PtrMemVT, DL, Chain, DescAddr,
4766 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
4767 /* Alignment = */ PtrMemVT.getSizeInBits() / 8,
4768 MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
4769 Chain = FuncTLVGet.getValue(1);
4770
4771 // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
4772 FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
4773
4774 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4775 MFI.setAdjustsStack(true);
4776
4777 // TLS calls preserve all registers except those that absolutely must be
4778 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
4779 // silly).
4780 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4781 const uint32_t *Mask = TRI->getTLSCallPreservedMask();
4782 if (Subtarget->hasCustomCallingConv())
4783 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
4784
4785 // Finally, we can make the call. This is just a degenerate version of a
4786 // normal AArch64 call node: x0 takes the address of the descriptor, and
4787 // returns the address of the variable in this thread.
4788 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
4789 Chain =
4790 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
4791 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
4792 DAG.getRegisterMask(Mask), Chain.getValue(1));
4793 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
4794}
4795
4796/// Convert a thread-local variable reference into a sequence of instructions to
4797/// compute the variable's address for the local exec TLS model of ELF targets.
4798/// The sequence depends on the maximum TLS area size.
4799SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
4800 SDValue ThreadBase,
4801 const SDLoc &DL,
4802 SelectionDAG &DAG) const {
4803 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4804 SDValue TPOff, Addr;
4805
4806 switch (DAG.getTarget().Options.TLSSize) {
4807 default:
4808 llvm_unreachable("Unexpected TLS size")::llvm::llvm_unreachable_internal("Unexpected TLS size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4808)
;
4809
4810 case 12: {
4811 // mrs x0, TPIDR_EL0
4812 // add x0, x0, :tprel_lo12:a
4813 SDValue Var = DAG.getTargetGlobalAddress(
4814 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
4815 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
4816 Var,
4817 DAG.getTargetConstant(0, DL, MVT::i32)),
4818 0);
4819 }
4820
4821 case 24: {
4822 // mrs x0, TPIDR_EL0
4823 // add x0, x0, :tprel_hi12:a
4824 // add x0, x0, :tprel_lo12_nc:a
4825 SDValue HiVar = DAG.getTargetGlobalAddress(
4826 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4827 SDValue LoVar = DAG.getTargetGlobalAddress(
4828 GV, DL, PtrVT, 0,
4829 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4830 Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
4831 HiVar,
4832 DAG.getTargetConstant(0, DL, MVT::i32)),
4833 0);
4834 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
4835 LoVar,
4836 DAG.getTargetConstant(0, DL, MVT::i32)),
4837 0);
4838 }
4839
4840 case 32: {
4841 // mrs x1, TPIDR_EL0
4842 // movz x0, #:tprel_g1:a
4843 // movk x0, #:tprel_g0_nc:a
4844 // add x0, x1, x0
4845 SDValue HiVar = DAG.getTargetGlobalAddress(
4846 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
4847 SDValue LoVar = DAG.getTargetGlobalAddress(
4848 GV, DL, PtrVT, 0,
4849 AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
4850 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
4851 DAG.getTargetConstant(16, DL, MVT::i32)),
4852 0);
4853 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
4854 DAG.getTargetConstant(0, DL, MVT::i32)),
4855 0);
4856 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
4857 }
4858
4859 case 48: {
4860 // mrs x1, TPIDR_EL0
4861 // movz x0, #:tprel_g2:a
4862 // movk x0, #:tprel_g1_nc:a
4863 // movk x0, #:tprel_g0_nc:a
4864 // add x0, x1, x0
4865 SDValue HiVar = DAG.getTargetGlobalAddress(
4866 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
4867 SDValue MiVar = DAG.getTargetGlobalAddress(
4868 GV, DL, PtrVT, 0,
4869 AArch64II::MO_TLS | AArch64II::MO_G1 | AArch64II::MO_NC);
4870 SDValue LoVar = DAG.getTargetGlobalAddress(
4871 GV, DL, PtrVT, 0,
4872 AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
4873 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
4874 DAG.getTargetConstant(32, DL, MVT::i32)),
4875 0);
4876 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
4877 DAG.getTargetConstant(16, DL, MVT::i32)),
4878 0);
4879 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
4880 DAG.getTargetConstant(0, DL, MVT::i32)),
4881 0);
4882 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
4883 }
4884 }
4885}
4886
4887/// When accessing thread-local variables under either the general-dynamic or
4888/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
4889/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
4890/// is a function pointer to carry out the resolution.
4891///
4892/// The sequence is:
4893/// adrp x0, :tlsdesc:var
4894/// ldr x1, [x0, #:tlsdesc_lo12:var]
4895/// add x0, x0, #:tlsdesc_lo12:var
4896/// .tlsdesccall var
4897/// blr x1
4898/// (TPIDR_EL0 offset now in x0)
4899///
4900/// The above sequence must be produced unscheduled, to enable the linker to
4901/// optimize/relax this sequence.
4902/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
4903/// above sequence, and expanded really late in the compilation flow, to ensure
4904/// the sequence is produced as per above.
4905SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
4906 const SDLoc &DL,
4907 SelectionDAG &DAG) const {
4908 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4909
4910 SDValue Chain = DAG.getEntryNode();
4911 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4912
4913 Chain =
4914 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
4915 SDValue Glue = Chain.getValue(1);
4916
4917 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
4918}
4919
4920SDValue
4921AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
4922 SelectionDAG &DAG) const {
4923 assert(Subtarget->isTargetELF() && "This function expects an ELF target")((Subtarget->isTargetELF() && "This function expects an ELF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4923, __PRETTY_FUNCTION__))
;
4924
4925 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4926
4927 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
4928
4929 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
4930 if (Model == TLSModel::LocalDynamic)
4931 Model = TLSModel::GeneralDynamic;
4932 }
4933
4934 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4935 Model != TLSModel::LocalExec)
4936 report_fatal_error("ELF TLS only supported in small memory model or "
4937 "in local exec TLS model");
4938 // Different choices can be made for the maximum size of the TLS area for a
4939 // module. For the small address model, the default TLS size is 16MiB and the
4940 // maximum TLS size is 4GiB.
4941 // FIXME: add tiny and large code model support for TLS access models other
4942 // than local exec. We currently generate the same code as small for tiny,
4943 // which may be larger than needed.
4944
4945 SDValue TPOff;
4946 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4947 SDLoc DL(Op);
4948 const GlobalValue *GV = GA->getGlobal();
4949
4950 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
4951
4952 if (Model == TLSModel::LocalExec) {
4953 return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
4954 } else if (Model == TLSModel::InitialExec) {
4955 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4956 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
4957 } else if (Model == TLSModel::LocalDynamic) {
4958 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
4959 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
4960 // the beginning of the module's TLS region, followed by a DTPREL offset
4961 // calculation.
4962
4963 // These accesses will need deduplicating if there's more than one.
4964 AArch64FunctionInfo *MFI =
4965 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4966 MFI->incNumLocalDynamicTLSAccesses();
4967
4968 // The call needs a relocation too for linker relaxation. It doesn't make
4969 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4970 // the address.
4971 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
4972 AArch64II::MO_TLS);
4973
4974 // Now we can calculate the offset from TPIDR_EL0 to this module's
4975 // thread-local area.
4976 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4977
4978 // Now use :dtprel_whatever: operations to calculate this variable's offset
4979 // in its thread-storage area.
4980 SDValue HiVar = DAG.getTargetGlobalAddress(
4981 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4982 SDValue LoVar = DAG.getTargetGlobalAddress(
4983 GV, DL, MVT::i64, 0,
4984 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4985
4986 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
4987 DAG.getTargetConstant(0, DL, MVT::i32)),
4988 0);
4989 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
4990 DAG.getTargetConstant(0, DL, MVT::i32)),
4991 0);
4992 } else if (Model == TLSModel::GeneralDynamic) {
4993 // The call needs a relocation too for linker relaxation. It doesn't make
4994 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4995 // the address.
4996 SDValue SymAddr =
4997 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4998
4999 // Finally we can make a call to calculate the offset from tpidr_el0.
5000 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
5001 } else
5002 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5002)
;
5003
5004 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
5005}
5006
5007SDValue
5008AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
5009 SelectionDAG &DAG) const {
5010 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")((Subtarget->isTargetWindows() && "Windows specific TLS lowering"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5010, __PRETTY_FUNCTION__))
;
5011
5012 SDValue Chain = DAG.getEntryNode();
5013 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5014 SDLoc DL(Op);
5015
5016 SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
5017
5018 // Load the ThreadLocalStoragePointer from the TEB
5019 // A pointer to the TLS array is located at offset 0x58 from the TEB.
5020 SDValue TLSArray =
5021 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
5022 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
5023 Chain = TLSArray.getValue(1);
5024
5025 // Load the TLS index from the C runtime;
5026 // This does the same as getAddr(), but without having a GlobalAddressSDNode.
5027 // This also does the same as LOADgot, but using a generic i32 load,
5028 // while LOADgot only loads i64.
5029 SDValue TLSIndexHi =
5030 DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
5031 SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
5032 "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
5033 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
5034 SDValue TLSIndex =
5035 DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
5036 TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
5037 Chain = TLSIndex.getValue(1);
5038
5039 // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
5040 // offset into the TLSArray.
5041 TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
5042 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
5043 DAG.getConstant(3, DL, PtrVT));
5044 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
5045 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
5046 MachinePointerInfo());
5047 Chain = TLS.getValue(1);
5048
5049 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
5050 const GlobalValue *GV = GA->getGlobal();
5051 SDValue TGAHi = DAG.getTargetGlobalAddress(
5052 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
5053 SDValue TGALo = DAG.getTargetGlobalAddress(
5054 GV, DL, PtrVT, 0,
5055 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
5056
5057 // Add the offset from the start of the .tls section (section base).
5058 SDValue Addr =
5059 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
5060 DAG.getTargetConstant(0, DL, MVT::i32)),
5061 0);
5062 Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
5063 return Addr;
5064}
5065
5066SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
5067 SelectionDAG &DAG) const {
5068 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
5069 if (DAG.getTarget().useEmulatedTLS())
5070 return LowerToTLSEmulatedModel(GA, DAG);
5071
5072 if (Subtarget->isTargetDarwin())
5073 return LowerDarwinGlobalTLSAddress(Op, DAG);
5074 if (Subtarget->isTargetELF())
5075 return LowerELFGlobalTLSAddress(Op, DAG);
5076 if (Subtarget->isTargetWindows())
5077 return LowerWindowsGlobalTLSAddress(Op, DAG);
5078
5079 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5079)
;
5080}
5081
5082SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5083 SDValue Chain = Op.getOperand(0);
5084 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5085 SDValue LHS = Op.getOperand(2);
5086 SDValue RHS = Op.getOperand(3);
5087 SDValue Dest = Op.getOperand(4);
5088 SDLoc dl(Op);
5089
5090 MachineFunction &MF = DAG.getMachineFunction();
5091 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
5092 // will not be produced, as they are conditional branch instructions that do
5093 // not set flags.
5094 bool ProduceNonFlagSettingCondBr =
5095 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
5096
5097 // Handle f128 first, since lowering it will result in comparing the return
5098 // value of a libcall against zero, which is just what the rest of LowerBR_CC
5099 // is expecting to deal with.
5100 if (LHS.getValueType() == MVT::f128) {
5101 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
5102
5103 // If softenSetCCOperands returned a scalar, we need to compare the result
5104 // against zero to select between true and false values.
5105 if (!RHS.getNode()) {
5106 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5107 CC = ISD::SETNE;
5108 }
5109 }
5110
5111 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5112 // instruction.
5113 if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
5114 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5115 // Only lower legal XALUO ops.
5116 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5117 return SDValue();
5118
5119 // The actual operation with overflow check.
5120 AArch64CC::CondCode OFCC;
5121 SDValue Value, Overflow;
5122 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
5123
5124 if (CC == ISD::SETNE)
5125 OFCC = getInvertedCondCode(OFCC);
5126 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
5127
5128 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
5129 Overflow);
5130 }
5131
5132 if (LHS.getValueType().isInteger()) {
5133 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5134, __PRETTY_FUNCTION__))
5134 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5134, __PRETTY_FUNCTION__))
;
5135
5136 // If the RHS of the comparison is zero, we can potentially fold this
5137 // to a specialized branch.
5138 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
5139 if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
5140 if (CC == ISD::SETEQ) {
5141 // See if we can use a TBZ to fold in an AND as well.
5142 // TBZ has a smaller branch displacement than CBZ. If the offset is
5143 // out of bounds, a late MI-layer pass rewrites branches.
5144 // 403.gcc is an example that hits this case.
5145 if (LHS.getOpcode() == ISD::AND &&
5146 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5147 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
5148 SDValue Test = LHS.getOperand(0);
5149 uint64_t Mask = LHS.getConstantOperandVal(1);
5150 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
5151 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
5152 Dest);
5153 }
5154
5155 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
5156 } else if (CC == ISD::SETNE) {
5157 // See if we can use a TBZ to fold in an AND as well.
5158 // TBZ has a smaller branch displacement than CBZ. If the offset is
5159 // out of bounds, a late MI-layer pass rewrites branches.
5160 // 403.gcc is an example that hits this case.
5161 if (LHS.getOpcode() == ISD::AND &&
5162 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5163 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
5164 SDValue Test = LHS.getOperand(0);
5165 uint64_t Mask = LHS.getConstantOperandVal(1);
5166 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
5167 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
5168 Dest);
5169 }
5170
5171 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
5172 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
5173 // Don't combine AND since emitComparison converts the AND to an ANDS
5174 // (a.k.a. TST) and the test in the test bit and branch instruction
5175 // becomes redundant. This would also increase register pressure.
5176 uint64_t Mask = LHS.getValueSizeInBits() - 1;
5177 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
5178 DAG.getConstant(Mask, dl, MVT::i64), Dest);
5179 }
5180 }
5181 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
5182 LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
5183 // Don't combine AND since emitComparison converts the AND to an ANDS
5184 // (a.k.a. TST) and the test in the test bit and branch instruction
5185 // becomes redundant. This would also increase register pressure.
5186 uint64_t Mask = LHS.getValueSizeInBits() - 1;
5187 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
5188 DAG.getConstant(Mask, dl, MVT::i64), Dest);
5189 }
5190
5191 SDValue CCVal;
5192 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
5193 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
5194 Cmp);
5195 }
5196
5197 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5198, __PRETTY_FUNCTION__))
5198 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5198, __PRETTY_FUNCTION__))
;
5199
5200 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
5201 // clean. Some of them require two branches to implement.
5202 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
5203 AArch64CC::CondCode CC1, CC2;
5204 changeFPCCToAArch64CC(CC, CC1, CC2);
5205 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5206 SDValue BR1 =
5207 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
5208 if (CC2 != AArch64CC::AL) {
5209 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
5210 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
5211 Cmp);
5212 }
5213
5214 return BR1;
5215}
5216
5217SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
5218 SelectionDAG &DAG) const {
5219 EVT VT = Op.getValueType();
5220 SDLoc DL(Op);
5221
5222 SDValue In1 = Op.getOperand(0);
5223 SDValue In2 = Op.getOperand(1);
5224 EVT SrcVT = In2.getValueType();
5225
5226 if (SrcVT.bitsLT(VT))
5227 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
5228 else if (SrcVT.bitsGT(VT))
5229 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
5230
5231 EVT VecVT;
5232 uint64_t EltMask;
5233 SDValue VecVal1, VecVal2;
5234
5235 auto setVecVal = [&] (int Idx) {
5236 if (!VT.isVector()) {
5237 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
5238 DAG.getUNDEF(VecVT), In1);
5239 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
5240 DAG.getUNDEF(VecVT), In2);
5241 } else {
5242 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
5243 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
5244 }
5245 };
5246
5247 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
5248 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
5249 EltMask = 0x80000000ULL;
5250 setVecVal(AArch64::ssub);
5251 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
5252 VecVT = MVT::v2i64;
5253
5254 // We want to materialize a mask with the high bit set, but the AdvSIMD
5255 // immediate moves cannot materialize that in a single instruction for
5256 // 64-bit elements. Instead, materialize zero and then negate it.
5257 EltMask = 0;
5258
5259 setVecVal(AArch64::dsub);
5260 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
5261 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
5262 EltMask = 0x8000ULL;
5263 setVecVal(AArch64::hsub);
5264 } else {
5265 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5265)
;
5266 }
5267
5268 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
5269
5270 // If we couldn't materialize the mask above, then the mask vector will be
5271 // the zero vector, and we need to negate it here.
5272 if (VT == MVT::f64 || VT == MVT::v2f64) {
5273 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
5274 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
5275 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
5276 }
5277
5278 SDValue Sel =
5279 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
5280
5281 if (VT == MVT::f16)
5282 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
5283 if (VT == MVT::f32)
5284 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
5285 else if (VT == MVT::f64)
5286 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
5287 else
5288 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
5289}
5290
5291SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
5292 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
5293 Attribute::NoImplicitFloat))
5294 return SDValue();
5295
5296 if (!Subtarget->hasNEON())
5297 return SDValue();
5298
5299 // While there is no integer popcount instruction, it can
5300 // be more efficiently lowered to the following sequence that uses
5301 // AdvSIMD registers/instructions as long as the copies to/from
5302 // the AdvSIMD registers are cheap.
5303 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
5304 // CNT V0.8B, V0.8B // 8xbyte pop-counts
5305 // ADDV B0, V0.8B // sum 8xbyte pop-counts
5306 // UMOV X0, V0.B[0] // copy byte result back to integer reg
5307 SDValue Val = Op.getOperand(0);
5308 SDLoc DL(Op);
5309 EVT VT = Op.getValueType();
5310
5311 if (VT == MVT::i32 || VT == MVT::i64) {
5312 if (VT == MVT::i32)
5313 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
5314 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
5315
5316 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
5317 SDValue UaddLV = DAG.getNode(
5318 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
5319 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
5320
5321 if (VT == MVT::i64)
5322 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
5323 return UaddLV;
5324 }
5325
5326 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5328, __PRETTY_FUNCTION__))
5327 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5328, __PRETTY_FUNCTION__))
5328 "Unexpected type for custom ctpop lowering")(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5328, __PRETTY_FUNCTION__))
;
5329
5330 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5331 Val = DAG.getBitcast(VT8Bit, Val);
5332 Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
5333
5334 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
5335 unsigned EltSize = 8;
5336 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
5337 while (EltSize != VT.getScalarSizeInBits()) {
5338 EltSize *= 2;
5339 NumElts /= 2;
5340 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
5341 Val = DAG.getNode(
5342 ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
5343 DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
5344 }
5345
5346 return Val;
5347}
5348
5349SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
5350
5351 if (Op.getValueType().isVector())
5352 return LowerVSETCC(Op, DAG);
5353
5354 bool IsStrict = Op->isStrictFPOpcode();
5355 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
5356 unsigned OpNo = IsStrict ? 1 : 0;
5357 SDValue Chain;
5358 if (IsStrict)
5359 Chain = Op.getOperand(0);
5360 SDValue LHS = Op.getOperand(OpNo + 0);
5361 SDValue RHS = Op.getOperand(OpNo + 1);
5362 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
5363 SDLoc dl(Op);
5364
5365 // We chose ZeroOrOneBooleanContents, so use zero and one.
5366 EVT VT = Op.getValueType();
5367 SDValue TVal = DAG.getConstant(1, dl, VT);
5368 SDValue FVal = DAG.getConstant(0, dl, VT);
5369
5370 // Handle f128 first, since one possible outcome is a normal integer
5371 // comparison which gets picked up by the next if statement.
5372 if (LHS.getValueType() == MVT::f128) {
5373 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
5374 IsSignaling);
5375
5376 // If softenSetCCOperands returned a scalar, use it.
5377 if (!RHS.getNode()) {
5378 assert(LHS.getValueType() == Op.getValueType() &&((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5379, __PRETTY_FUNCTION__))
5379 "Unexpected setcc expansion!")((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5379, __PRETTY_FUNCTION__))
;
5380 return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
5381 }
5382 }
5383
5384 if (LHS.getValueType().isInteger()) {
5385 SDValue CCVal;
5386 SDValue Cmp = getAArch64Cmp(
5387 LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
5388
5389 // Note that we inverted the condition above, so we reverse the order of
5390 // the true and false operands here. This will allow the setcc to be
5391 // matched to a single CSINC instruction.
5392 SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
5393 return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
5394 }
5395
5396 // Now we know we're dealing with FP values.
5397 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5398, __PRETTY_FUNCTION__))
5398 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5398, __PRETTY_FUNCTION__))
;
5399
5400 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
5401 // and do the comparison.
5402 SDValue Cmp;
5403 if (IsStrict)
5404 Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
5405 else
5406 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
5407
5408 AArch64CC::CondCode CC1, CC2;
5409 changeFPCCToAArch64CC(CC, CC1, CC2);
5410 SDValue Res;
5411 if (CC2 == AArch64CC::AL) {
5412 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
5413 CC2);
5414 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5415
5416 // Note that we inverted the condition above, so we reverse the order of
5417 // the true and false operands here. This will allow the setcc to be
5418 // matched to a single CSINC instruction.
5419 Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
5420 } else {
5421 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
5422 // totally clean. Some of them require two CSELs to implement. As is in
5423 // this case, we emit the first CSEL and then emit a second using the output
5424 // of the first as the RHS. We're effectively OR'ing the two CC's together.
5425
5426 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
5427 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5428 SDValue CS1 =
5429 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
5430
5431 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
5432 Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
5433 }
5434 return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
5435}
5436
5437SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
5438 SDValue RHS, SDValue TVal,
5439 SDValue FVal, const SDLoc &dl,
5440 SelectionDAG &DAG) const {
5441 // Handle f128 first, because it will result in a comparison of some RTLIB
5442 // call result against zero.
5443 if (LHS.getValueType() == MVT::f128) {
1
Taking true branch
5444 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
5445
5446 // If softenSetCCOperands returned a scalar, we need to compare the result
5447 // against zero to select between true and false values.
5448 if (!RHS.getNode()) {
2
Assuming the condition is false
3
Taking false branch
5449 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5450 CC = ISD::SETNE;
5451 }
5452 }
5453
5454 // Also handle f16, for which we need to do a f32 comparison.
5455 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4
Taking false branch
5456 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
5457 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
5458 }
5459
5460 // Next, handle integers.
5461 if (LHS.getValueType().isInteger()) {
5
Taking true branch
5462 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5463, __PRETTY_FUNCTION__))
5463 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5463, __PRETTY_FUNCTION__))
;
5464
5465 unsigned Opcode = AArch64ISD::CSEL;
5466
5467 // If both the TVal and the FVal are constants, see if we can swap them in
5468 // order to for a CSINV or CSINC out of them.
5469 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
5470 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
6
Calling 'dyn_cast<llvm::ConstantSDNode, llvm::SDValue>'
21
Returning from 'dyn_cast<llvm::ConstantSDNode, llvm::SDValue>'
5471
5472 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
22
Assuming 'CTVal' is null
5473 std::swap(TVal, FVal);
5474 std::swap(CTVal, CFVal);
5475 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5476 } else if (CTVal
22.1
'CTVal' is null
22.1
'CTVal' is null
22.1
'CTVal' is null
&& CFVal && CTVal->isOne() && CFVal->isNullValue()) {
5477 std::swap(TVal, FVal);
5478 std::swap(CTVal, CFVal);
5479 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5480 } else if (TVal.getOpcode() == ISD::XOR) {
23
Calling 'SDValue::getOpcode'
5481 // If TVal is a NOT we want to swap TVal and FVal so that we can match
5482 // with a CSINV rather than a CSEL.
5483 if (isAllOnesConstant(TVal.getOperand(1))) {
5484 std::swap(TVal, FVal);
5485 std::swap(CTVal, CFVal);
5486 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5487 }
5488 } else if (TVal.getOpcode() == ISD::SUB) {
5489 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
5490 // that we can match with a CSNEG rather than a CSEL.
5491 if (isNullConstant(TVal.getOperand(0))) {
5492 std::swap(TVal, FVal);
5493 std::swap(CTVal, CFVal);
5494 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5495 }
5496 } else if (CTVal && CFVal) {
5497 const int64_t TrueVal = CTVal->getSExtValue();
5498 const int64_t FalseVal = CFVal->getSExtValue();
5499 bool Swap = false;
5500
5501 // If both TVal and FVal are constants, see if FVal is the
5502 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
5503 // instead of a CSEL in that case.
5504 if (TrueVal == ~FalseVal) {
5505 Opcode = AArch64ISD::CSINV;
5506 } else if (TrueVal == -FalseVal) {
5507 Opcode = AArch64ISD::CSNEG;
5508 } else if (TVal.getValueType() == MVT::i32) {
5509 // If our operands are only 32-bit wide, make sure we use 32-bit
5510 // arithmetic for the check whether we can use CSINC. This ensures that
5511 // the addition in the check will wrap around properly in case there is
5512 // an overflow (which would not be the case if we do the check with
5513 // 64-bit arithmetic).
5514 const uint32_t TrueVal32 = CTVal->getZExtValue();
5515 const uint32_t FalseVal32 = CFVal->getZExtValue();
5516
5517 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
5518 Opcode = AArch64ISD::CSINC;
5519
5520 if (TrueVal32 > FalseVal32) {
5521 Swap = true;
5522 }
5523 }
5524 // 64-bit check whether we can use CSINC.
5525 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
5526 Opcode = AArch64ISD::CSINC;
5527
5528 if (TrueVal > FalseVal) {
5529 Swap = true;
5530 }
5531 }
5532
5533 // Swap TVal and FVal if necessary.
5534 if (Swap) {
5535 std::swap(TVal, FVal);
5536 std::swap(CTVal, CFVal);
5537 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5538 }
5539
5540 if (Opcode != AArch64ISD::CSEL) {
5541 // Drop FVal since we can get its value by simply inverting/negating
5542 // TVal.
5543 FVal = TVal;
5544 }
5545 }
5546
5547 // Avoid materializing a constant when possible by reusing a known value in
5548 // a register. However, don't perform this optimization if the known value
5549 // is one, zero or negative one in the case of a CSEL. We can always
5550 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
5551 // FVal, respectively.
5552 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
5553 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
5554 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
5555 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
5556 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
5557 // "a != C ? x : a" to avoid materializing C.
5558 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
5559 TVal = LHS;
5560 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
5561 FVal = LHS;
5562 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
5563 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")((CTVal && CFVal && "Expected constant operands for CSNEG."
) ? static_cast<void> (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5563, __PRETTY_FUNCTION__))
;
5564 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
5565 // avoid materializing C.
5566 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
5567 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
5568 Opcode = AArch64ISD::CSINV;
5569 TVal = LHS;
5570 FVal = DAG.getConstant(0, dl, FVal.getValueType());
5571 }
5572 }
5573
5574 SDValue CCVal;
5575 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
5576 EVT VT = TVal.getValueType();
5577 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
5578 }
5579
5580 // Now we know we're dealing with FP values.
5581 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5582, __PRETTY_FUNCTION__))
5582 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5582, __PRETTY_FUNCTION__))
;
5583 assert(LHS.getValueType() == RHS.getValueType())((LHS.getValueType() == RHS.getValueType()) ? static_cast<
void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5583, __PRETTY_FUNCTION__))
;
5584 EVT VT = TVal.getValueType();
5585 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
5586
5587 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
5588 // clean. Some of them require two CSELs to implement.
5589 AArch64CC::CondCode CC1, CC2;
5590 changeFPCCToAArch64CC(CC, CC1, CC2);
5591
5592 if (DAG.getTarget().Options.UnsafeFPMath) {
5593 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
5594 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
5595 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
5596 if (RHSVal && RHSVal->isZero()) {
5597 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
5598 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
5599
5600 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
5601 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
5602 TVal = LHS;
5603 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
5604 CFVal && CFVal->isZero() &&
5605 FVal.getValueType() == LHS.getValueType())
5606 FVal = LHS;
5607 }
5608 }
5609
5610 // Emit first, and possibly only, CSEL.
5611 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5612 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
5613
5614 // If we need a second CSEL, emit it, using the output of the first as the
5615 // RHS. We're effectively OR'ing the two CC's together.
5616 if (CC2 != AArch64CC::AL) {
5617 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
5618 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
5619 }
5620
5621 // Otherwise, return the output of the first CSEL.
5622 return CS1;
5623}
5624
5625SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
5626 SelectionDAG &DAG) const {
5627 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5628 SDValue LHS = Op.getOperand(0);
5629 SDValue RHS = Op.getOperand(1);
5630 SDValue TVal = Op.getOperand(2);
5631 SDValue FVal = Op.getOperand(3);
5632 SDLoc DL(Op);
5633 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
5634}
5635
5636SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
5637 SelectionDAG &DAG) const {
5638 SDValue CCVal = Op->getOperand(0);
5639 SDValue TVal = Op->getOperand(1);
5640 SDValue FVal = Op->getOperand(2);
5641 SDLoc DL(Op);
5642
5643 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
5644 // instruction.
5645 if (ISD::isOverflowIntrOpRes(CCVal)) {
5646 // Only lower legal XALUO ops.
5647 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
5648 return SDValue();
5649
5650 AArch64CC::CondCode OFCC;
5651 SDValue Value, Overflow;
5652 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
5653 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
5654
5655 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
5656 CCVal, Overflow);
5657 }
5658
5659 // Lower it the same way as we would lower a SELECT_CC node.
5660 ISD::CondCode CC;
5661 SDValue LHS, RHS;
5662 if (CCVal.getOpcode() == ISD::SETCC) {
5663 LHS = CCVal.getOperand(0);
5664 RHS = CCVal.getOperand(1);
5665 CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
5666 } else {
5667 LHS = CCVal;
5668 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
5669 CC = ISD::SETNE;
5670 }
5671 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
5672}
5673
5674SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
5675 SelectionDAG &DAG) const {
5676 // Jump table entries as PC relative offsets. No additional tweaking
5677 // is necessary here. Just get the address of the jump table.
5678 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
5679
5680 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
5681 !Subtarget->isTargetMachO()) {
5682 return getAddrLarge(JT, DAG);
5683 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5684 return getAddrTiny(JT, DAG);
5685 }
5686 return getAddr(JT, DAG);
5687}
5688
5689SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
5690 SelectionDAG &DAG) const {
5691 // Jump table entries as PC relative offsets. No additional tweaking
5692 // is necessary here. Just get the address of the jump table.
5693 SDLoc DL(Op);
5694 SDValue JT = Op.getOperand(1);
5695 SDValue Entry = Op.getOperand(2);
5696 int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
5697
5698 SDNode *Dest =
5699 DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
5700 Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
5701 return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
5702 SDValue(Dest, 0));
5703}
5704
5705SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
5706 SelectionDAG &DAG) const {
5707 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
5708
5709 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
5710 // Use the GOT for the large code model on iOS.
5711 if (Subtarget->isTargetMachO()) {
5712 return getGOT(CP, DAG);
5713 }
5714 return getAddrLarge(CP, DAG);
5715 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5716 return getAddrTiny(CP, DAG);
5717 } else {
5718 return getAddr(CP, DAG);
5719 }
5720}
5721
5722SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
5723 SelectionDAG &DAG) const {
5724 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
5725 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
5726 !Subtarget->isTargetMachO()) {
5727 return getAddrLarge(BA, DAG);
5728 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5729 return getAddrTiny(BA, DAG);
5730 }
5731 return getAddr(BA, DAG);
5732}
5733
5734SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
5735 SelectionDAG &DAG) const {
5736 AArch64FunctionInfo *FuncInfo =
5737 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
5738
5739 SDLoc DL(Op);
5740 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
5741 getPointerTy(DAG.getDataLayout()));
5742 FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
5743 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5744 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
5745 MachinePointerInfo(SV));
5746}
5747
5748SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
5749 SelectionDAG &DAG) const {
5750 AArch64FunctionInfo *FuncInfo =
5751 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
5752
5753 SDLoc DL(Op);
5754 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
5755 ? FuncInfo->getVarArgsGPRIndex()
5756 : FuncInfo->getVarArgsStackIndex(),
5757 getPointerTy(DAG.getDataLayout()));
5758 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5759 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
5760 MachinePointerInfo(SV));
5761}
5762
5763SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
5764 SelectionDAG &DAG) const {
5765 // The layout of the va_list struct is specified in the AArch64 Procedure Call
5766 // Standard, section B.3.
5767 MachineFunction &MF = DAG.getMachineFunction();
5768 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5769 auto PtrVT = getPointerTy(DAG.getDataLayout());
5770 SDLoc DL(Op);
5771
5772 SDValue Chain = Op.getOperand(0);
5773 SDValue VAList = Op.getOperand(1);
5774 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5775 SmallVector<SDValue, 4> MemOps;
5776
5777 // void *__stack at offset 0
5778 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
5779 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
5780 MachinePointerInfo(SV), /* Alignment = */ 8));
5781
5782 // void *__gr_top at offset 8
5783 int GPRSize = FuncInfo->getVarArgsGPRSize();
5784 if (GPRSize > 0) {
5785 SDValue GRTop, GRTopAddr;
5786
5787 GRTopAddr =
5788 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
5789
5790 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
5791 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
5792 DAG.getConstant(GPRSize, DL, PtrVT));
5793
5794 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
5795 MachinePointerInfo(SV, 8),
5796 /* Alignment = */ 8));
5797 }
5798
5799 // void *__vr_top at offset 16
5800 int FPRSize = FuncInfo->getVarArgsFPRSize();
5801 if (FPRSize > 0) {
5802 SDValue VRTop, VRTopAddr;
5803 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5804 DAG.getConstant(16, DL, PtrVT));
5805
5806 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
5807 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
5808 DAG.getConstant(FPRSize, DL, PtrVT));
5809
5810 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
5811 MachinePointerInfo(SV, 16),
5812 /* Alignment = */ 8));
5813 }
5814
5815 // int __gr_offs at offset 24
5816 SDValue GROffsAddr =
5817 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
5818 MemOps.push_back(DAG.getStore(
5819 Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
5820 MachinePointerInfo(SV, 24), /* Alignment = */ 4));
5821
5822 // int __vr_offs at offset 28
5823 SDValue VROffsAddr =
5824 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
5825 MemOps.push_back(DAG.getStore(
5826 Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
5827 MachinePointerInfo(SV, 28), /* Alignment = */ 4));
5828
5829 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5830}
5831
5832SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
5833 SelectionDAG &DAG) const {
5834 MachineFunction &MF = DAG.getMachineFunction();
5835
5836 if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
5837 return LowerWin64_VASTART(Op, DAG);
5838 else if (Subtarget->isTargetDarwin())
5839 return LowerDarwin_VASTART(Op, DAG);
5840 else
5841 return LowerAAPCS_VASTART(Op, DAG);
5842}
5843
5844SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
5845 SelectionDAG &DAG) const {
5846 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
5847 // pointer.
5848 SDLoc DL(Op);
5849 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
5850 unsigned VaListSize = (Subtarget->isTargetDarwin() ||
5851 Subtarget->isTargetWindows()) ? PtrSize : 32;
5852 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
5853 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
5854
5855 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
5856 DAG.getConstant(VaListSize, DL, MVT::i32),
5857 Align(PtrSize), false, false, false,
5858 MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
5859}
5860
5861SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
5862 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5863, __PRETTY_FUNCTION__))
5863 "automatic va_arg instruction only works on Darwin")((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llv