Bug Summary

File:llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 9902, column 48
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-11/lib/clang/11.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/include -I /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347=. -ferror-limit 19 -fmessage-length 0 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-03-09-184146-41876-1 -x c++ /build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64TargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ISelLowering.h"
14#include "AArch64CallingConvention.h"
15#include "AArch64ExpandImm.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallSet.h"
27#include "llvm/ADT/SmallVector.h"
28#include "llvm/ADT/Statistic.h"
29#include "llvm/ADT/StringRef.h"
30#include "llvm/ADT/StringSwitch.h"
31#include "llvm/ADT/Triple.h"
32#include "llvm/ADT/Twine.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/CallingConvLower.h"
35#include "llvm/CodeGen/MachineBasicBlock.h"
36#include "llvm/CodeGen/MachineFrameInfo.h"
37#include "llvm/CodeGen/MachineFunction.h"
38#include "llvm/CodeGen/MachineInstr.h"
39#include "llvm/CodeGen/MachineInstrBuilder.h"
40#include "llvm/CodeGen/MachineMemOperand.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/RuntimeLibcalls.h"
43#include "llvm/CodeGen/SelectionDAG.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/TargetCallingConv.h"
46#include "llvm/CodeGen/TargetInstrInfo.h"
47#include "llvm/CodeGen/ValueTypes.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugLoc.h"
52#include "llvm/IR/DerivedTypes.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/GetElementPtrTypeIterator.h"
55#include "llvm/IR/GlobalValue.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/Instruction.h"
58#include "llvm/IR/Instructions.h"
59#include "llvm/IR/IntrinsicInst.h"
60#include "llvm/IR/Intrinsics.h"
61#include "llvm/IR/IntrinsicsAArch64.h"
62#include "llvm/IR/Module.h"
63#include "llvm/IR/OperandTraits.h"
64#include "llvm/IR/PatternMatch.h"
65#include "llvm/IR/Type.h"
66#include "llvm/IR/Use.h"
67#include "llvm/IR/Value.h"
68#include "llvm/MC/MCRegisterInfo.h"
69#include "llvm/Support/Casting.h"
70#include "llvm/Support/CodeGen.h"
71#include "llvm/Support/CommandLine.h"
72#include "llvm/Support/Compiler.h"
73#include "llvm/Support/Debug.h"
74#include "llvm/Support/ErrorHandling.h"
75#include "llvm/Support/KnownBits.h"
76#include "llvm/Support/MachineValueType.h"
77#include "llvm/Support/MathExtras.h"
78#include "llvm/Support/raw_ostream.h"
79#include "llvm/Target/TargetMachine.h"
80#include "llvm/Target/TargetOptions.h"
81#include <algorithm>
82#include <bitset>
83#include <cassert>
84#include <cctype>
85#include <cstdint>
86#include <cstdlib>
87#include <iterator>
88#include <limits>
89#include <tuple>
90#include <utility>
91#include <vector>
92
93using namespace llvm;
94using namespace llvm::PatternMatch;
95
96#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
97
98STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls"}
;
99STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts"}
;
100STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized"}
;
101
102static cl::opt<bool>
103EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
104 cl::desc("Allow AArch64 SLI/SRI formation"),
105 cl::init(false));
106
107// FIXME: The necessary dtprel relocations don't seem to be supported
108// well in the GNU bfd and gold linkers at the moment. Therefore, by
109// default, for now, fall back to GeneralDynamic code generation.
110cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
111 "aarch64-elf-ldtls-generation", cl::Hidden,
112 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
113 cl::init(false));
114
115static cl::opt<bool>
116EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
117 cl::desc("Enable AArch64 logical imm instruction "
118 "optimization"),
119 cl::init(true));
120
121/// Value type used for condition codes.
122static const MVT MVT_CC = MVT::i32;
123
124AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
125 const AArch64Subtarget &STI)
126 : TargetLowering(TM), Subtarget(&STI) {
127 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
128 // we have to make something up. Arbitrarily, choose ZeroOrOne.
129 setBooleanContents(ZeroOrOneBooleanContent);
130 // When comparing vectors the result sets the different elements in the
131 // vector to all-one or all-zero.
132 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
133
134 // Set up the register classes.
135 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
136 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
137
138 if (Subtarget->hasFPARMv8()) {
139 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
140 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
141 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
142 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
143 }
144
145 if (Subtarget->hasNEON()) {
146 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
147 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
148 // Someone set us up the NEON.
149 addDRTypeForNEON(MVT::v2f32);
150 addDRTypeForNEON(MVT::v8i8);
151 addDRTypeForNEON(MVT::v4i16);
152 addDRTypeForNEON(MVT::v2i32);
153 addDRTypeForNEON(MVT::v1i64);
154 addDRTypeForNEON(MVT::v1f64);
155 addDRTypeForNEON(MVT::v4f16);
156
157 addQRTypeForNEON(MVT::v4f32);
158 addQRTypeForNEON(MVT::v2f64);
159 addQRTypeForNEON(MVT::v16i8);
160 addQRTypeForNEON(MVT::v8i16);
161 addQRTypeForNEON(MVT::v4i32);
162 addQRTypeForNEON(MVT::v2i64);
163 addQRTypeForNEON(MVT::v8f16);
164 }
165
166 if (Subtarget->hasSVE()) {
167 // Add legal sve predicate types
168 addRegisterClass(MVT::nxv2i1, &AArch64::PPRRegClass);
169 addRegisterClass(MVT::nxv4i1, &AArch64::PPRRegClass);
170 addRegisterClass(MVT::nxv8i1, &AArch64::PPRRegClass);
171 addRegisterClass(MVT::nxv16i1, &AArch64::PPRRegClass);
172
173 // Add legal sve data types
174 addRegisterClass(MVT::nxv16i8, &AArch64::ZPRRegClass);
175 addRegisterClass(MVT::nxv8i16, &AArch64::ZPRRegClass);
176 addRegisterClass(MVT::nxv4i32, &AArch64::ZPRRegClass);
177 addRegisterClass(MVT::nxv2i64, &AArch64::ZPRRegClass);
178
179 addRegisterClass(MVT::nxv2f16, &AArch64::ZPRRegClass);
180 addRegisterClass(MVT::nxv4f16, &AArch64::ZPRRegClass);
181 addRegisterClass(MVT::nxv8f16, &AArch64::ZPRRegClass);
182 addRegisterClass(MVT::nxv2f32, &AArch64::ZPRRegClass);
183 addRegisterClass(MVT::nxv4f32, &AArch64::ZPRRegClass);
184 addRegisterClass(MVT::nxv2f64, &AArch64::ZPRRegClass);
185
186 for (auto VT : { MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64 }) {
187 setOperationAction(ISD::SADDSAT, VT, Legal);
188 setOperationAction(ISD::UADDSAT, VT, Legal);
189 setOperationAction(ISD::SSUBSAT, VT, Legal);
190 setOperationAction(ISD::USUBSAT, VT, Legal);
191 setOperationAction(ISD::SMAX, VT, Legal);
192 setOperationAction(ISD::UMAX, VT, Legal);
193 setOperationAction(ISD::SMIN, VT, Legal);
194 setOperationAction(ISD::UMIN, VT, Legal);
195 }
196
197 for (auto VT :
198 { MVT::nxv2i8, MVT::nxv2i16, MVT::nxv2i32, MVT::nxv2i64, MVT::nxv4i8,
199 MVT::nxv4i16, MVT::nxv4i32, MVT::nxv8i8, MVT::nxv8i16 })
200 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Legal);
201 }
202
203 // Compute derived properties from the register classes
204 computeRegisterProperties(Subtarget->getRegisterInfo());
205
206 // Provide all sorts of operation actions
207 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
208 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
209 setOperationAction(ISD::SETCC, MVT::i32, Custom);
210 setOperationAction(ISD::SETCC, MVT::i64, Custom);
211 setOperationAction(ISD::SETCC, MVT::f16, Custom);
212 setOperationAction(ISD::SETCC, MVT::f32, Custom);
213 setOperationAction(ISD::SETCC, MVT::f64, Custom);
214 setOperationAction(ISD::STRICT_FSETCC, MVT::f16, Custom);
215 setOperationAction(ISD::STRICT_FSETCC, MVT::f32, Custom);
216 setOperationAction(ISD::STRICT_FSETCC, MVT::f64, Custom);
217 setOperationAction(ISD::STRICT_FSETCCS, MVT::f16, Custom);
218 setOperationAction(ISD::STRICT_FSETCCS, MVT::f32, Custom);
219 setOperationAction(ISD::STRICT_FSETCCS, MVT::f64, Custom);
220 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
221 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
222 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
223 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
224 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
225 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
226 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
227 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
228 setOperationAction(ISD::SELECT, MVT::i32, Custom);
229 setOperationAction(ISD::SELECT, MVT::i64, Custom);
230 setOperationAction(ISD::SELECT, MVT::f16, Custom);
231 setOperationAction(ISD::SELECT, MVT::f32, Custom);
232 setOperationAction(ISD::SELECT, MVT::f64, Custom);
233 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
234 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
235 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
236 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
237 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
238 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
239 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
240
241 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
242 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
243 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
244
245 setOperationAction(ISD::FREM, MVT::f32, Expand);
246 setOperationAction(ISD::FREM, MVT::f64, Expand);
247 setOperationAction(ISD::FREM, MVT::f80, Expand);
248
249 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
250
251 // Custom lowering hooks are needed for XOR
252 // to fold it into CSINC/CSINV.
253 setOperationAction(ISD::XOR, MVT::i32, Custom);
254 setOperationAction(ISD::XOR, MVT::i64, Custom);
255
256 // Virtually no operation on f128 is legal, but LLVM can't expand them when
257 // there's a valid register class, so we need custom operations in most cases.
258 setOperationAction(ISD::FABS, MVT::f128, Expand);
259 setOperationAction(ISD::FADD, MVT::f128, Custom);
260 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
261 setOperationAction(ISD::FCOS, MVT::f128, Expand);
262 setOperationAction(ISD::FDIV, MVT::f128, Custom);
263 setOperationAction(ISD::FMA, MVT::f128, Expand);
264 setOperationAction(ISD::FMUL, MVT::f128, Custom);
265 setOperationAction(ISD::FNEG, MVT::f128, Expand);
266 setOperationAction(ISD::FPOW, MVT::f128, Expand);
267 setOperationAction(ISD::FREM, MVT::f128, Expand);
268 setOperationAction(ISD::FRINT, MVT::f128, Expand);
269 setOperationAction(ISD::FSIN, MVT::f128, Expand);
270 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
271 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
272 setOperationAction(ISD::FSUB, MVT::f128, Custom);
273 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
274 setOperationAction(ISD::SETCC, MVT::f128, Custom);
275 setOperationAction(ISD::STRICT_FSETCC, MVT::f128, Custom);
276 setOperationAction(ISD::STRICT_FSETCCS, MVT::f128, Custom);
277 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
278 setOperationAction(ISD::SELECT, MVT::f128, Custom);
279 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
280 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
281
282 // Lowering for many of the conversions is actually specified by the non-f128
283 // type. The LowerXXX function will be trivial when f128 isn't involved.
284 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
285 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
286 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
287 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
288 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
289 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i128, Custom);
290 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
291 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
292 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
293 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
294 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
295 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i128, Custom);
296 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
297 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
298 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
299 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
300 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
301 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i128, Custom);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
304 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
305 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
306 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
307 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i128, Custom);
308 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
309 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
310 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
311 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
312
313 // Variable arguments.
314 setOperationAction(ISD::VASTART, MVT::Other, Custom);
315 setOperationAction(ISD::VAARG, MVT::Other, Custom);
316 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
317 setOperationAction(ISD::VAEND, MVT::Other, Expand);
318
319 // Variable-sized objects.
320 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
321 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
322
323 if (Subtarget->isTargetWindows())
324 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
325 else
326 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
327
328 // Constant pool entries
329 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
330
331 // BlockAddress
332 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
333
334 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
335 setOperationAction(ISD::ADDC, MVT::i32, Custom);
336 setOperationAction(ISD::ADDE, MVT::i32, Custom);
337 setOperationAction(ISD::SUBC, MVT::i32, Custom);
338 setOperationAction(ISD::SUBE, MVT::i32, Custom);
339 setOperationAction(ISD::ADDC, MVT::i64, Custom);
340 setOperationAction(ISD::ADDE, MVT::i64, Custom);
341 setOperationAction(ISD::SUBC, MVT::i64, Custom);
342 setOperationAction(ISD::SUBE, MVT::i64, Custom);
343
344 // AArch64 lacks both left-rotate and popcount instructions.
345 setOperationAction(ISD::ROTL, MVT::i32, Expand);
346 setOperationAction(ISD::ROTL, MVT::i64, Expand);
347 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
348 setOperationAction(ISD::ROTL, VT, Expand);
349 setOperationAction(ISD::ROTR, VT, Expand);
350 }
351
352 // AArch64 doesn't have {U|S}MUL_LOHI.
353 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
354 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
355
356 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
357 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
358
359 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
360 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
361 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
362 setOperationAction(ISD::SDIVREM, VT, Expand);
363 setOperationAction(ISD::UDIVREM, VT, Expand);
364 }
365 setOperationAction(ISD::SREM, MVT::i32, Expand);
366 setOperationAction(ISD::SREM, MVT::i64, Expand);
367 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
368 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
369 setOperationAction(ISD::UREM, MVT::i32, Expand);
370 setOperationAction(ISD::UREM, MVT::i64, Expand);
371
372 // Custom lower Add/Sub/Mul with overflow.
373 setOperationAction(ISD::SADDO, MVT::i32, Custom);
374 setOperationAction(ISD::SADDO, MVT::i64, Custom);
375 setOperationAction(ISD::UADDO, MVT::i32, Custom);
376 setOperationAction(ISD::UADDO, MVT::i64, Custom);
377 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
378 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
379 setOperationAction(ISD::USUBO, MVT::i32, Custom);
380 setOperationAction(ISD::USUBO, MVT::i64, Custom);
381 setOperationAction(ISD::SMULO, MVT::i32, Custom);
382 setOperationAction(ISD::SMULO, MVT::i64, Custom);
383 setOperationAction(ISD::UMULO, MVT::i32, Custom);
384 setOperationAction(ISD::UMULO, MVT::i64, Custom);
385
386 setOperationAction(ISD::FSIN, MVT::f32, Expand);
387 setOperationAction(ISD::FSIN, MVT::f64, Expand);
388 setOperationAction(ISD::FCOS, MVT::f32, Expand);
389 setOperationAction(ISD::FCOS, MVT::f64, Expand);
390 setOperationAction(ISD::FPOW, MVT::f32, Expand);
391 setOperationAction(ISD::FPOW, MVT::f64, Expand);
392 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
393 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
394 if (Subtarget->hasFullFP16())
395 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
396 else
397 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
398
399 setOperationAction(ISD::FREM, MVT::f16, Promote);
400 setOperationAction(ISD::FREM, MVT::v4f16, Expand);
401 setOperationAction(ISD::FREM, MVT::v8f16, Expand);
402 setOperationAction(ISD::FPOW, MVT::f16, Promote);
403 setOperationAction(ISD::FPOW, MVT::v4f16, Expand);
404 setOperationAction(ISD::FPOW, MVT::v8f16, Expand);
405 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
406 setOperationAction(ISD::FPOWI, MVT::v4f16, Expand);
407 setOperationAction(ISD::FPOWI, MVT::v8f16, Expand);
408 setOperationAction(ISD::FCOS, MVT::f16, Promote);
409 setOperationAction(ISD::FCOS, MVT::v4f16, Expand);
410 setOperationAction(ISD::FCOS, MVT::v8f16, Expand);
411 setOperationAction(ISD::FSIN, MVT::f16, Promote);
412 setOperationAction(ISD::FSIN, MVT::v4f16, Expand);
413 setOperationAction(ISD::FSIN, MVT::v8f16, Expand);
414 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
415 setOperationAction(ISD::FSINCOS, MVT::v4f16, Expand);
416 setOperationAction(ISD::FSINCOS, MVT::v8f16, Expand);
417 setOperationAction(ISD::FEXP, MVT::f16, Promote);
418 setOperationAction(ISD::FEXP, MVT::v4f16, Expand);
419 setOperationAction(ISD::FEXP, MVT::v8f16, Expand);
420 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
421 setOperationAction(ISD::FEXP2, MVT::v4f16, Expand);
422 setOperationAction(ISD::FEXP2, MVT::v8f16, Expand);
423 setOperationAction(ISD::FLOG, MVT::f16, Promote);
424 setOperationAction(ISD::FLOG, MVT::v4f16, Expand);
425 setOperationAction(ISD::FLOG, MVT::v8f16, Expand);
426 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
427 setOperationAction(ISD::FLOG2, MVT::v4f16, Expand);
428 setOperationAction(ISD::FLOG2, MVT::v8f16, Expand);
429 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
430 setOperationAction(ISD::FLOG10, MVT::v4f16, Expand);
431 setOperationAction(ISD::FLOG10, MVT::v8f16, Expand);
432
433 if (!Subtarget->hasFullFP16()) {
434 setOperationAction(ISD::SELECT, MVT::f16, Promote);
435 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
436 setOperationAction(ISD::SETCC, MVT::f16, Promote);
437 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
438 setOperationAction(ISD::FADD, MVT::f16, Promote);
439 setOperationAction(ISD::FSUB, MVT::f16, Promote);
440 setOperationAction(ISD::FMUL, MVT::f16, Promote);
441 setOperationAction(ISD::FDIV, MVT::f16, Promote);
442 setOperationAction(ISD::FMA, MVT::f16, Promote);
443 setOperationAction(ISD::FNEG, MVT::f16, Promote);
444 setOperationAction(ISD::FABS, MVT::f16, Promote);
445 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
446 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
447 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
448 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
449 setOperationAction(ISD::FRINT, MVT::f16, Promote);
450 setOperationAction(ISD::FROUND, MVT::f16, Promote);
451 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
452 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
453 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
454 setOperationAction(ISD::FMINIMUM, MVT::f16, Promote);
455 setOperationAction(ISD::FMAXIMUM, MVT::f16, Promote);
456
457 // promote v4f16 to v4f32 when that is known to be safe.
458 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
459 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
460 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
461 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
462 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
463 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
464 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
465 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
466
467 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
468 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
469 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
470 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
471 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
472 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
473 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
474 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
475 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
476 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
477 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
478 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
479 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
480 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
481 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
482
483 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
484 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
485 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
486 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
487 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
488 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
489 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
490 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
491 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
492 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
493 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
494 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
495 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
496 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
497 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
498 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
499 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
500 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
501 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
502 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
503 }
504
505 // AArch64 has implementations of a lot of rounding-like FP operations.
506 for (MVT Ty : {MVT::f32, MVT::f64}) {
507 setOperationAction(ISD::FFLOOR, Ty, Legal);
508 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
509 setOperationAction(ISD::FCEIL, Ty, Legal);
510 setOperationAction(ISD::FRINT, Ty, Legal);
511 setOperationAction(ISD::FTRUNC, Ty, Legal);
512 setOperationAction(ISD::FROUND, Ty, Legal);
513 setOperationAction(ISD::FMINNUM, Ty, Legal);
514 setOperationAction(ISD::FMAXNUM, Ty, Legal);
515 setOperationAction(ISD::FMINIMUM, Ty, Legal);
516 setOperationAction(ISD::FMAXIMUM, Ty, Legal);
517 setOperationAction(ISD::LROUND, Ty, Legal);
518 setOperationAction(ISD::LLROUND, Ty, Legal);
519 setOperationAction(ISD::LRINT, Ty, Legal);
520 setOperationAction(ISD::LLRINT, Ty, Legal);
521 }
522
523 if (Subtarget->hasFullFP16()) {
524 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
525 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
526 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
527 setOperationAction(ISD::FRINT, MVT::f16, Legal);
528 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
529 setOperationAction(ISD::FROUND, MVT::f16, Legal);
530 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
531 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
532 setOperationAction(ISD::FMINIMUM, MVT::f16, Legal);
533 setOperationAction(ISD::FMAXIMUM, MVT::f16, Legal);
534 }
535
536 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
537
538 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
539
540 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
541 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
542 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
543 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
544 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
545
546 // 128-bit loads and stores can be done without expanding
547 setOperationAction(ISD::LOAD, MVT::i128, Custom);
548 setOperationAction(ISD::STORE, MVT::i128, Custom);
549
550 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of the
551 // custom lowering, as there are no un-paired non-temporal stores and
552 // legalization will break up 256 bit inputs.
553 setOperationAction(ISD::STORE, MVT::v32i8, Custom);
554 setOperationAction(ISD::STORE, MVT::v16i16, Custom);
555 setOperationAction(ISD::STORE, MVT::v16f16, Custom);
556 setOperationAction(ISD::STORE, MVT::v8i32, Custom);
557 setOperationAction(ISD::STORE, MVT::v8f32, Custom);
558 setOperationAction(ISD::STORE, MVT::v4f64, Custom);
559 setOperationAction(ISD::STORE, MVT::v4i64, Custom);
560
561 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
562 // This requires the Performance Monitors extension.
563 if (Subtarget->hasPerfMon())
564 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
565
566 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
567 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
568 // Issue __sincos_stret if available.
569 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
570 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
571 } else {
572 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
573 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
574 }
575
576 if (Subtarget->getTargetTriple().isOSMSVCRT()) {
577 // MSVCRT doesn't have powi; fall back to pow
578 setLibcallName(RTLIB::POWI_F32, nullptr);
579 setLibcallName(RTLIB::POWI_F64, nullptr);
580 }
581
582 // Make floating-point constants legal for the large code model, so they don't
583 // become loads from the constant pool.
584 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
585 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
586 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
587 }
588
589 // AArch64 does not have floating-point extending loads, i1 sign-extending
590 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
591 for (MVT VT : MVT::fp_valuetypes()) {
592 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
593 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
594 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
595 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
596 }
597 for (MVT VT : MVT::integer_valuetypes())
598 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
599
600 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
601 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
602 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
603 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
604 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
605 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
606 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
607
608 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
609 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
610
611 // Indexed loads and stores are supported.
612 for (unsigned im = (unsigned)ISD::PRE_INC;
613 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
614 setIndexedLoadAction(im, MVT::i8, Legal);
615 setIndexedLoadAction(im, MVT::i16, Legal);
616 setIndexedLoadAction(im, MVT::i32, Legal);
617 setIndexedLoadAction(im, MVT::i64, Legal);
618 setIndexedLoadAction(im, MVT::f64, Legal);
619 setIndexedLoadAction(im, MVT::f32, Legal);
620 setIndexedLoadAction(im, MVT::f16, Legal);
621 setIndexedStoreAction(im, MVT::i8, Legal);
622 setIndexedStoreAction(im, MVT::i16, Legal);
623 setIndexedStoreAction(im, MVT::i32, Legal);
624 setIndexedStoreAction(im, MVT::i64, Legal);
625 setIndexedStoreAction(im, MVT::f64, Legal);
626 setIndexedStoreAction(im, MVT::f32, Legal);
627 setIndexedStoreAction(im, MVT::f16, Legal);
628 }
629
630 // Trap.
631 setOperationAction(ISD::TRAP, MVT::Other, Legal);
632 if (Subtarget->isTargetWindows())
633 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
634
635 // We combine OR nodes for bitfield operations.
636 setTargetDAGCombine(ISD::OR);
637 // Try to create BICs for vector ANDs.
638 setTargetDAGCombine(ISD::AND);
639
640 // Vector add and sub nodes may conceal a high-half opportunity.
641 // Also, try to fold ADD into CSINC/CSINV..
642 setTargetDAGCombine(ISD::ADD);
643 setTargetDAGCombine(ISD::SUB);
644 setTargetDAGCombine(ISD::SRL);
645 setTargetDAGCombine(ISD::XOR);
646 setTargetDAGCombine(ISD::SINT_TO_FP);
647 setTargetDAGCombine(ISD::UINT_TO_FP);
648
649 setTargetDAGCombine(ISD::FP_TO_SINT);
650 setTargetDAGCombine(ISD::FP_TO_UINT);
651 setTargetDAGCombine(ISD::FDIV);
652
653 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
654
655 setTargetDAGCombine(ISD::ANY_EXTEND);
656 setTargetDAGCombine(ISD::ZERO_EXTEND);
657 setTargetDAGCombine(ISD::SIGN_EXTEND);
658 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
659 setTargetDAGCombine(ISD::CONCAT_VECTORS);
660 setTargetDAGCombine(ISD::STORE);
661 if (Subtarget->supportsAddressTopByteIgnored())
662 setTargetDAGCombine(ISD::LOAD);
663
664 setTargetDAGCombine(ISD::MUL);
665
666 setTargetDAGCombine(ISD::SELECT);
667 setTargetDAGCombine(ISD::VSELECT);
668
669 setTargetDAGCombine(ISD::INTRINSIC_VOID);
670 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
671 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
672
673 setTargetDAGCombine(ISD::GlobalAddress);
674
675 // In case of strict alignment, avoid an excessive number of byte wide stores.
676 MaxStoresPerMemsetOptSize = 8;
677 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
678 ? MaxStoresPerMemsetOptSize : 32;
679
680 MaxGluedStoresPerMemcpy = 4;
681 MaxStoresPerMemcpyOptSize = 4;
682 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
683 ? MaxStoresPerMemcpyOptSize : 16;
684
685 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
686
687 MaxLoadsPerMemcmpOptSize = 4;
688 MaxLoadsPerMemcmp = Subtarget->requiresStrictAlign()
689 ? MaxLoadsPerMemcmpOptSize : 8;
690
691 setStackPointerRegisterToSaveRestore(AArch64::SP);
692
693 setSchedulingPreference(Sched::Hybrid);
694
695 EnableExtLdPromotion = true;
696
697 // Set required alignment.
698 setMinFunctionAlignment(Align(4));
699 // Set preferred alignments.
700 setPrefLoopAlignment(Align(1ULL << STI.getPrefLoopLogAlignment()));
701 setPrefFunctionAlignment(Align(1ULL << STI.getPrefFunctionLogAlignment()));
702
703 // Only change the limit for entries in a jump table if specified by
704 // the sub target, but not at the command line.
705 unsigned MaxJT = STI.getMaximumJumpTableSize();
706 if (MaxJT && getMaximumJumpTableSize() == UINT_MAX(2147483647 *2U +1U))
707 setMaximumJumpTableSize(MaxJT);
708
709 setHasExtractBitsInsn(true);
710
711 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
712
713 if (Subtarget->hasNEON()) {
714 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
715 // silliness like this:
716 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
717 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
718 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
719 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
720 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
721 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
722 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
723 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
724 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
725 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
726 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
727 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
728 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
729 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
730 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
731 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
732 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
733 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
734 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
735 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
736 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
737 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
738 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
739 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
740 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
741
742 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
743 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
744 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
745 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
746 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
747
748 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
749
750 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
751 // elements smaller than i32, so promote the input to i32 first.
752 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
753 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
754 // i8 vector elements also need promotion to i32 for v8i8
755 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
756 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
757 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
758 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
759 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
760 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
761 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
762 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
763 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
764 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
765 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
766
767 if (Subtarget->hasFullFP16()) {
768 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
769 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
770 setOperationAction(ISD::SINT_TO_FP, MVT::v8i16, Custom);
771 setOperationAction(ISD::UINT_TO_FP, MVT::v8i16, Custom);
772 } else {
773 // when AArch64 doesn't have fullfp16 support, promote the input
774 // to i32 first.
775 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
776 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
777 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
778 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
779 }
780
781 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
782 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
783
784 // AArch64 doesn't have MUL.2d:
785 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
786 // Custom handling for some quad-vector types to detect MULL.
787 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
788 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
789 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
790
791 for (MVT VT : { MVT::v8i8, MVT::v4i16, MVT::v2i32,
792 MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
793 // Vector reductions
794 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
795 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
796 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
797 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
798 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
799
800 // Saturates
801 setOperationAction(ISD::SADDSAT, VT, Legal);
802 setOperationAction(ISD::UADDSAT, VT, Legal);
803 setOperationAction(ISD::SSUBSAT, VT, Legal);
804 setOperationAction(ISD::USUBSAT, VT, Legal);
805 }
806 for (MVT VT : { MVT::v4f16, MVT::v2f32,
807 MVT::v8f16, MVT::v4f32, MVT::v2f64 }) {
808 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
809 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
810 }
811
812 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
813 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
814 // Likewise, narrowing and extending vector loads/stores aren't handled
815 // directly.
816 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
817 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
818
819 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
820 setOperationAction(ISD::MULHS, VT, Legal);
821 setOperationAction(ISD::MULHU, VT, Legal);
822 } else {
823 setOperationAction(ISD::MULHS, VT, Expand);
824 setOperationAction(ISD::MULHU, VT, Expand);
825 }
826 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
827 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
828
829 setOperationAction(ISD::BSWAP, VT, Expand);
830 setOperationAction(ISD::CTTZ, VT, Expand);
831
832 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
833 setTruncStoreAction(VT, InnerVT, Expand);
834 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
835 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
836 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
837 }
838 }
839
840 // AArch64 has implementations of a lot of rounding-like FP operations.
841 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
842 setOperationAction(ISD::FFLOOR, Ty, Legal);
843 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
844 setOperationAction(ISD::FCEIL, Ty, Legal);
845 setOperationAction(ISD::FRINT, Ty, Legal);
846 setOperationAction(ISD::FTRUNC, Ty, Legal);
847 setOperationAction(ISD::FROUND, Ty, Legal);
848 }
849
850 if (Subtarget->hasFullFP16()) {
851 for (MVT Ty : {MVT::v4f16, MVT::v8f16}) {
852 setOperationAction(ISD::FFLOOR, Ty, Legal);
853 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
854 setOperationAction(ISD::FCEIL, Ty, Legal);
855 setOperationAction(ISD::FRINT, Ty, Legal);
856 setOperationAction(ISD::FTRUNC, Ty, Legal);
857 setOperationAction(ISD::FROUND, Ty, Legal);
858 }
859 }
860
861 if (Subtarget->hasSVE())
862 setOperationAction(ISD::VSCALE, MVT::i32, Custom);
863
864 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
865 }
866
867 if (Subtarget->hasSVE()) {
868 // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
869 // splat of 0 or undef) once vector selects supported in SVE codegen. See
870 // D68877 for more details.
871 for (MVT VT : MVT::integer_scalable_vector_valuetypes()) {
872 if (isTypeLegal(VT))
873 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
874 }
875 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
876 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
877
878 for (MVT VT : MVT::fp_scalable_vector_valuetypes()) {
879 if (isTypeLegal(VT)) {
880 setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
881 }
882 }
883 }
884
885 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
886}
887
888void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
889 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 889, __PRETTY_FUNCTION__))
;
890
891 if (VT.isFloatingPoint()) {
892 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
893 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
894 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
895 }
896
897 // Mark vector float intrinsics as expand.
898 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
899 setOperationAction(ISD::FSIN, VT, Expand);
900 setOperationAction(ISD::FCOS, VT, Expand);
901 setOperationAction(ISD::FPOW, VT, Expand);
902 setOperationAction(ISD::FLOG, VT, Expand);
903 setOperationAction(ISD::FLOG2, VT, Expand);
904 setOperationAction(ISD::FLOG10, VT, Expand);
905 setOperationAction(ISD::FEXP, VT, Expand);
906 setOperationAction(ISD::FEXP2, VT, Expand);
907
908 // But we do support custom-lowering for FCOPYSIGN.
909 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
910 }
911
912 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
913 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
914 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
915 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
916 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
917 setOperationAction(ISD::SRA, VT, Custom);
918 setOperationAction(ISD::SRL, VT, Custom);
919 setOperationAction(ISD::SHL, VT, Custom);
920 setOperationAction(ISD::OR, VT, Custom);
921 setOperationAction(ISD::SETCC, VT, Custom);
922 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
923
924 setOperationAction(ISD::SELECT, VT, Expand);
925 setOperationAction(ISD::SELECT_CC, VT, Expand);
926 setOperationAction(ISD::VSELECT, VT, Expand);
927 for (MVT InnerVT : MVT::all_valuetypes())
928 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
929
930 // CNT supports only B element sizes, then use UADDLP to widen.
931 if (VT != MVT::v8i8 && VT != MVT::v16i8)
932 setOperationAction(ISD::CTPOP, VT, Custom);
933
934 setOperationAction(ISD::UDIV, VT, Expand);
935 setOperationAction(ISD::SDIV, VT, Expand);
936 setOperationAction(ISD::UREM, VT, Expand);
937 setOperationAction(ISD::SREM, VT, Expand);
938 setOperationAction(ISD::FREM, VT, Expand);
939
940 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
941 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
942
943 if (!VT.isFloatingPoint())
944 setOperationAction(ISD::ABS, VT, Legal);
945
946 // [SU][MIN|MAX] are available for all NEON types apart from i64.
947 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
948 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
949 setOperationAction(Opcode, VT, Legal);
950
951 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
952 if (VT.isFloatingPoint() &&
953 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
954 for (unsigned Opcode :
955 {ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINNUM, ISD::FMAXNUM})
956 setOperationAction(Opcode, VT, Legal);
957
958 if (Subtarget->isLittleEndian()) {
959 for (unsigned im = (unsigned)ISD::PRE_INC;
960 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
961 setIndexedLoadAction(im, VT, Legal);
962 setIndexedStoreAction(im, VT, Legal);
963 }
964 }
965}
966
967void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
968 addRegisterClass(VT, &AArch64::FPR64RegClass);
969 addTypeForNEON(VT, MVT::v2i32);
970}
971
972void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
973 addRegisterClass(VT, &AArch64::FPR128RegClass);
974 addTypeForNEON(VT, MVT::v4i32);
975}
976
977EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
978 EVT VT) const {
979 if (!VT.isVector())
980 return MVT::i32;
981 return VT.changeVectorElementTypeToInteger();
982}
983
984static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
985 const APInt &Demanded,
986 TargetLowering::TargetLoweringOpt &TLO,
987 unsigned NewOpc) {
988 uint64_t OldImm = Imm, NewImm, Enc;
989 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
990
991 // Return if the immediate is already all zeros, all ones, a bimm32 or a
992 // bimm64.
993 if (Imm == 0 || Imm == Mask ||
994 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
995 return false;
996
997 unsigned EltSize = Size;
998 uint64_t DemandedBits = Demanded.getZExtValue();
999
1000 // Clear bits that are not demanded.
1001 Imm &= DemandedBits;
1002
1003 while (true) {
1004 // The goal here is to set the non-demanded bits in a way that minimizes
1005 // the number of switching between 0 and 1. In order to achieve this goal,
1006 // we set the non-demanded bits to the value of the preceding demanded bits.
1007 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
1008 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
1009 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
1010 // The final result is 0b11000011.
1011 uint64_t NonDemandedBits = ~DemandedBits;
1012 uint64_t InvertedImm = ~Imm & DemandedBits;
1013 uint64_t RotatedImm =
1014 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
1015 NonDemandedBits;
1016 uint64_t Sum = RotatedImm + NonDemandedBits;
1017 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
1018 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
1019 NewImm = (Imm | Ones) & Mask;
1020
1021 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
1022 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
1023 // we halve the element size and continue the search.
1024 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
1025 break;
1026
1027 // We cannot shrink the element size any further if it is 2-bits.
1028 if (EltSize == 2)
1029 return false;
1030
1031 EltSize /= 2;
1032 Mask >>= EltSize;
1033 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
1034
1035 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
1036 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
1037 return false;
1038
1039 // Merge the upper and lower halves of Imm and DemandedBits.
1040 Imm |= Hi;
1041 DemandedBits |= DemandedBitsHi;
1042 }
1043
1044 ++NumOptimizedImms;
1045
1046 // Replicate the element across the register width.
1047 while (EltSize < Size) {
1048 NewImm |= NewImm << EltSize;
1049 EltSize *= 2;
1050 }
1051
1052 (void)OldImm;
1053 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1054, __PRETTY_FUNCTION__))
1054 "demanded bits should never be altered")((((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&
"demanded bits should never be altered") ? static_cast<void
> (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1054, __PRETTY_FUNCTION__))
;
1055 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")((OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? static_cast<void> (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1055, __PRETTY_FUNCTION__))
;
1056
1057 // Create the new constant immediate node.
1058 EVT VT = Op.getValueType();
1059 SDLoc DL(Op);
1060 SDValue New;
1061
1062 // If the new constant immediate is all-zeros or all-ones, let the target
1063 // independent DAG combine optimize this node.
1064 if (NewImm == 0 || NewImm == OrigMask) {
1065 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
1066 TLO.DAG.getConstant(NewImm, DL, VT));
1067 // Otherwise, create a machine node so that target independent DAG combine
1068 // doesn't undo this optimization.
1069 } else {
1070 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
1071 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
1072 New = SDValue(
1073 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
1074 }
1075
1076 return TLO.CombineTo(Op, New);
1077}
1078
1079bool AArch64TargetLowering::targetShrinkDemandedConstant(
1080 SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
1081 // Delay this optimization to as late as possible.
1082 if (!TLO.LegalOps)
1083 return false;
1084
1085 if (!EnableOptimizeLogicalImm)
1086 return false;
1087
1088 EVT VT = Op.getValueType();
1089 if (VT.isVector())
1090 return false;
1091
1092 unsigned Size = VT.getSizeInBits();
1093 assert((Size == 32 || Size == 64) &&(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1094, __PRETTY_FUNCTION__))
1094 "i32 or i64 is expected after legalization.")(((Size == 32 || Size == 64) && "i32 or i64 is expected after legalization."
) ? static_cast<void> (0) : __assert_fail ("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1094, __PRETTY_FUNCTION__))
;
1095
1096 // Exit early if we demand all bits.
1097 if (Demanded.countPopulation() == Size)
1098 return false;
1099
1100 unsigned NewOpc;
1101 switch (Op.getOpcode()) {
1102 default:
1103 return false;
1104 case ISD::AND:
1105 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
1106 break;
1107 case ISD::OR:
1108 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
1109 break;
1110 case ISD::XOR:
1111 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
1112 break;
1113 }
1114 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1115 if (!C)
1116 return false;
1117 uint64_t Imm = C->getZExtValue();
1118 return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
1119}
1120
1121/// computeKnownBitsForTargetNode - Determine which of the bits specified in
1122/// Mask are known to be either zero or one and return them Known.
1123void AArch64TargetLowering::computeKnownBitsForTargetNode(
1124 const SDValue Op, KnownBits &Known,
1125 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
1126 switch (Op.getOpcode()) {
1127 default:
1128 break;
1129 case AArch64ISD::CSEL: {
1130 KnownBits Known2;
1131 Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1132 Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1133 Known.Zero &= Known2.Zero;
1134 Known.One &= Known2.One;
1135 break;
1136 }
1137 case AArch64ISD::LOADgot:
1138 case AArch64ISD::ADDlow: {
1139 if (!Subtarget->isTargetILP32())
1140 break;
1141 // In ILP32 mode all valid pointers are in the low 4GB of the address-space.
1142 Known.Zero = APInt::getHighBitsSet(64, 32);
1143 break;
1144 }
1145 case ISD::INTRINSIC_W_CHAIN: {
1146 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1147 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1148 switch (IntID) {
1149 default: return;
1150 case Intrinsic::aarch64_ldaxr:
1151 case Intrinsic::aarch64_ldxr: {
1152 unsigned BitWidth = Known.getBitWidth();
1153 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1154 unsigned MemBits = VT.getScalarSizeInBits();
1155 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1156 return;
1157 }
1158 }
1159 break;
1160 }
1161 case ISD::INTRINSIC_WO_CHAIN:
1162 case ISD::INTRINSIC_VOID: {
1163 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1164 switch (IntNo) {
1165 default:
1166 break;
1167 case Intrinsic::aarch64_neon_umaxv:
1168 case Intrinsic::aarch64_neon_uminv: {
1169 // Figure out the datatype of the vector operand. The UMINV instruction
1170 // will zero extend the result, so we can mark as known zero all the
1171 // bits larger than the element datatype. 32-bit or larget doesn't need
1172 // this as those are legal types and will be handled by isel directly.
1173 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1174 unsigned BitWidth = Known.getBitWidth();
1175 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1176 assert(BitWidth >= 8 && "Unexpected width!")((BitWidth >= 8 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1176, __PRETTY_FUNCTION__))
;
1177 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1178 Known.Zero |= Mask;
1179 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1180 assert(BitWidth >= 16 && "Unexpected width!")((BitWidth >= 16 && "Unexpected width!") ? static_cast
<void> (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1180, __PRETTY_FUNCTION__))
;
1181 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1182 Known.Zero |= Mask;
1183 }
1184 break;
1185 } break;
1186 }
1187 }
1188 }
1189}
1190
1191MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1192 EVT) const {
1193 return MVT::i64;
1194}
1195
1196bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1197 EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1198 bool *Fast) const {
1199 if (Subtarget->requiresStrictAlign())
1200 return false;
1201
1202 if (Fast) {
1203 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1204 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1205 // See comments in performSTORECombine() for more details about
1206 // these conditions.
1207
1208 // Code that uses clang vector extensions can mark that it
1209 // wants unaligned accesses to be treated as fast by
1210 // underspecifying alignment to be 1 or 2.
1211 Align <= 2 ||
1212
1213 // Disregard v2i64. Memcpy lowering produces those and splitting
1214 // them regresses performance on micro-benchmarks and olden/bh.
1215 VT == MVT::v2i64;
1216 }
1217 return true;
1218}
1219
1220// Same as above but handling LLTs instead.
1221bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(
1222 LLT Ty, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1223 bool *Fast) const {
1224 if (Subtarget->requiresStrictAlign())
1225 return false;
1226
1227 if (Fast) {
1228 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1229 *Fast = !Subtarget->isMisaligned128StoreSlow() ||
1230 Ty.getSizeInBytes() != 16 ||
1231 // See comments in performSTORECombine() for more details about
1232 // these conditions.
1233
1234 // Code that uses clang vector extensions can mark that it
1235 // wants unaligned accesses to be treated as fast by
1236 // underspecifying alignment to be 1 or 2.
1237 Align <= 2 ||
1238
1239 // Disregard v2i64. Memcpy lowering produces those and splitting
1240 // them regresses performance on micro-benchmarks and olden/bh.
1241 Ty == LLT::vector(2, 64);
1242 }
1243 return true;
1244}
1245
1246FastISel *
1247AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1248 const TargetLibraryInfo *libInfo) const {
1249 return AArch64::createFastISel(funcInfo, libInfo);
1250}
1251
1252const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1253 switch ((AArch64ISD::NodeType)Opcode) {
1254 case AArch64ISD::FIRST_NUMBER: break;
1255 case AArch64ISD::CALL: return "AArch64ISD::CALL";
1256 case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
1257 case AArch64ISD::ADR: return "AArch64ISD::ADR";
1258 case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
1259 case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
1260 case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
1261 case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
1262 case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
1263 case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
1264 case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
1265 case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
1266 case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
1267 case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
1268 case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1269 case AArch64ISD::ADC: return "AArch64ISD::ADC";
1270 case AArch64ISD::SBC: return "AArch64ISD::SBC";
1271 case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
1272 case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
1273 case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
1274 case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
1275 case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
1276 case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
1277 case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
1278 case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
1279 case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
1280 case AArch64ISD::STRICT_FCMP: return "AArch64ISD::STRICT_FCMP";
1281 case AArch64ISD::STRICT_FCMPE: return "AArch64ISD::STRICT_FCMPE";
1282 case AArch64ISD::DUP: return "AArch64ISD::DUP";
1283 case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
1284 case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
1285 case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
1286 case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
1287 case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
1288 case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
1289 case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
1290 case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
1291 case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
1292 case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
1293 case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
1294 case AArch64ISD::BICi: return "AArch64ISD::BICi";
1295 case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
1296 case AArch64ISD::BSP: return "AArch64ISD::BSP";
1297 case AArch64ISD::NEG: return "AArch64ISD::NEG";
1298 case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
1299 case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
1300 case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
1301 case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
1302 case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
1303 case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
1304 case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
1305 case AArch64ISD::REV16: return "AArch64ISD::REV16";
1306 case AArch64ISD::REV32: return "AArch64ISD::REV32";
1307 case AArch64ISD::REV64: return "AArch64ISD::REV64";
1308 case AArch64ISD::EXT: return "AArch64ISD::EXT";
1309 case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
1310 case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
1311 case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
1312 case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
1313 case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
1314 case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
1315 case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
1316 case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
1317 case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
1318 case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
1319 case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
1320 case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
1321 case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
1322 case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
1323 case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
1324 case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
1325 case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
1326 case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
1327 case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
1328 case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
1329 case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
1330 case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
1331 case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
1332 case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
1333 case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
1334 case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
1335 case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
1336 case AArch64ISD::SMAXV_PRED: return "AArch64ISD::SMAXV_PRED";
1337 case AArch64ISD::UMAXV_PRED: return "AArch64ISD::UMAXV_PRED";
1338 case AArch64ISD::SMINV_PRED: return "AArch64ISD::SMINV_PRED";
1339 case AArch64ISD::UMINV_PRED: return "AArch64ISD::UMINV_PRED";
1340 case AArch64ISD::ORV_PRED: return "AArch64ISD::ORV_PRED";
1341 case AArch64ISD::EORV_PRED: return "AArch64ISD::EORV_PRED";
1342 case AArch64ISD::ANDV_PRED: return "AArch64ISD::ANDV_PRED";
1343 case AArch64ISD::CLASTA_N: return "AArch64ISD::CLASTA_N";
1344 case AArch64ISD::CLASTB_N: return "AArch64ISD::CLASTB_N";
1345 case AArch64ISD::LASTA: return "AArch64ISD::LASTA";
1346 case AArch64ISD::LASTB: return "AArch64ISD::LASTB";
1347 case AArch64ISD::REV: return "AArch64ISD::REV";
1348 case AArch64ISD::REINTERPRET_CAST: return "AArch64ISD::REINTERPRET_CAST";
1349 case AArch64ISD::TBL: return "AArch64ISD::TBL";
1350 case AArch64ISD::NOT: return "AArch64ISD::NOT";
1351 case AArch64ISD::BIT: return "AArch64ISD::BIT";
1352 case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
1353 case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
1354 case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
1355 case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
1356 case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
1357 case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
1358 case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
1359 case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
1360 case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
1361 case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
1362 case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
1363 case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
1364 case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
1365 case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
1366 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
1367 case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
1368 case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
1369 case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
1370 case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
1371 case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
1372 case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
1373 case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
1374 case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
1375 case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
1376 case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
1377 case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
1378 case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1379 case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1380 case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1381 case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1382 case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1383 case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1384 case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1385 case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1386 case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1387 case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1388 case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1389 case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1390 case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1391 case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1392 case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1393 case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1394 case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1395 case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1396 case AArch64ISD::STG: return "AArch64ISD::STG";
1397 case AArch64ISD::STZG: return "AArch64ISD::STZG";
1398 case AArch64ISD::ST2G: return "AArch64ISD::ST2G";
1399 case AArch64ISD::STZ2G: return "AArch64ISD::STZ2G";
1400 case AArch64ISD::SUNPKHI: return "AArch64ISD::SUNPKHI";
1401 case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";
1402 case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
1403 case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
1404 case AArch64ISD::INSR: return "AArch64ISD::INSR";
1405 case AArch64ISD::PTEST: return "AArch64ISD::PTEST";
1406 case AArch64ISD::PTRUE: return "AArch64ISD::PTRUE";
1407 case AArch64ISD::LDNF1: return "AArch64ISD::LDNF1";
1408 case AArch64ISD::LDNF1S: return "AArch64ISD::LDNF1S";
1409 case AArch64ISD::LDFF1: return "AArch64ISD::LDFF1";
1410 case AArch64ISD::LDFF1S: return "AArch64ISD::LDFF1S";
1411 case AArch64ISD::GLD1: return "AArch64ISD::GLD1";
1412 case AArch64ISD::GLD1_SCALED: return "AArch64ISD::GLD1_SCALED";
1413 case AArch64ISD::GLD1_SXTW: return "AArch64ISD::GLD1_SXTW";
1414 case AArch64ISD::GLD1_UXTW: return "AArch64ISD::GLD1_UXTW";
1415 case AArch64ISD::GLD1_SXTW_SCALED: return "AArch64ISD::GLD1_SXTW_SCALED";
1416 case AArch64ISD::GLD1_UXTW_SCALED: return "AArch64ISD::GLD1_UXTW_SCALED";
1417 case AArch64ISD::GLD1_IMM: return "AArch64ISD::GLD1_IMM";
1418 case AArch64ISD::GLD1S: return "AArch64ISD::GLD1S";
1419 case AArch64ISD::GLD1S_SCALED: return "AArch64ISD::GLD1S_SCALED";
1420 case AArch64ISD::GLD1S_SXTW: return "AArch64ISD::GLD1S_SXTW";
1421 case AArch64ISD::GLD1S_UXTW: return "AArch64ISD::GLD1S_UXTW";
1422 case AArch64ISD::GLD1S_SXTW_SCALED: return "AArch64ISD::GLD1S_SXTW_SCALED";
1423 case AArch64ISD::GLD1S_UXTW_SCALED: return "AArch64ISD::GLD1S_UXTW_SCALED";
1424 case AArch64ISD::GLD1S_IMM: return "AArch64ISD::GLD1S_IMM";
1425 case AArch64ISD::GLDFF1: return "AArch64ISD::GLDFF1";
1426 case AArch64ISD::GLDFF1_SCALED: return "AArch64ISD::GLDFF1_SCALED";
1427 case AArch64ISD::GLDFF1_SXTW: return "AArch64ISD::GLDFF1_SXTW";
1428 case AArch64ISD::GLDFF1_UXTW: return "AArch64ISD::GLDFF1_UXTW";
1429 case AArch64ISD::GLDFF1_SXTW_SCALED:return "AArch64ISD::GLDFF1_SXTW_SCALED";
1430 case AArch64ISD::GLDFF1_UXTW_SCALED:return "AArch64ISD::GLDFF1_UXTW_SCALED";
1431 case AArch64ISD::GLDFF1_IMM: return "AArch64ISD::GLDFF1_IMM";
1432 case AArch64ISD::GLDFF1S: return "AArch64ISD::GLDFF1S";
1433 case AArch64ISD::GLDFF1S_SCALED: return "AArch64ISD::GLDFF1S_SCALED";
1434 case AArch64ISD::GLDFF1S_SXTW: return "AArch64ISD::GLDFF1S_SXTW";
1435 case AArch64ISD::GLDFF1S_UXTW: return "AArch64ISD::GLDFF1S_UXTW";
1436 case AArch64ISD::GLDFF1S_SXTW_SCALED:
1437 return "AArch64ISD::GLDFF1S_SXTW_SCALED";
1438 case AArch64ISD::GLDFF1S_UXTW_SCALED:
1439 return "AArch64ISD::GLDFF1S_UXTW_SCALED";
1440 case AArch64ISD::GLDFF1S_IMM: return "AArch64ISD::GLDFF1S_IMM";
1441
1442 case AArch64ISD::GLDNT1: return "AArch64ISD::GLDNT1";
1443 case AArch64ISD::GLDNT1S: return "AArch64ISD::GLDNT1S";
1444
1445 case AArch64ISD::SST1: return "AArch64ISD::SST1";
1446 case AArch64ISD::SST1_SCALED: return "AArch64ISD::SST1_SCALED";
1447 case AArch64ISD::SST1_SXTW: return "AArch64ISD::SST1_SXTW";
1448 case AArch64ISD::SST1_UXTW: return "AArch64ISD::SST1_UXTW";
1449 case AArch64ISD::SST1_SXTW_SCALED: return "AArch64ISD::SST1_SXTW_SCALED";
1450 case AArch64ISD::SST1_UXTW_SCALED: return "AArch64ISD::SST1_UXTW_SCALED";
1451 case AArch64ISD::SST1_IMM: return "AArch64ISD::SST1_IMM";
1452
1453 case AArch64ISD::SSTNT1: return "AArch64ISD::SSTNT1";
1454
1455 case AArch64ISD::LDP: return "AArch64ISD::LDP";
1456 case AArch64ISD::STP: return "AArch64ISD::STP";
1457 case AArch64ISD::STNP: return "AArch64ISD::STNP";
1458 case AArch64ISD::DUP_PRED: return "AArch64ISD::DUP_PRED";
1459 case AArch64ISD::INDEX_VECTOR: return "AArch64ISD::INDEX_VECTOR";
1460 }
1461 return nullptr;
1462}
1463
1464MachineBasicBlock *
1465AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
1466 MachineBasicBlock *MBB) const {
1467 // We materialise the F128CSEL pseudo-instruction as some control flow and a
1468 // phi node:
1469
1470 // OrigBB:
1471 // [... previous instrs leading to comparison ...]
1472 // b.ne TrueBB
1473 // b EndBB
1474 // TrueBB:
1475 // ; Fallthrough
1476 // EndBB:
1477 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1478
1479 MachineFunction *MF = MBB->getParent();
1480 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1481 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1482 DebugLoc DL = MI.getDebugLoc();
1483 MachineFunction::iterator It = ++MBB->getIterator();
1484
1485 Register DestReg = MI.getOperand(0).getReg();
1486 Register IfTrueReg = MI.getOperand(1).getReg();
1487 Register IfFalseReg = MI.getOperand(2).getReg();
1488 unsigned CondCode = MI.getOperand(3).getImm();
1489 bool NZCVKilled = MI.getOperand(4).isKill();
1490
1491 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1492 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1493 MF->insert(It, TrueBB);
1494 MF->insert(It, EndBB);
1495
1496 // Transfer rest of current basic-block to EndBB
1497 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1498 MBB->end());
1499 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1500
1501 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1502 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1503 MBB->addSuccessor(TrueBB);
1504 MBB->addSuccessor(EndBB);
1505
1506 // TrueBB falls through to the end.
1507 TrueBB->addSuccessor(EndBB);
1508
1509 if (!NZCVKilled) {
1510 TrueBB->addLiveIn(AArch64::NZCV);
1511 EndBB->addLiveIn(AArch64::NZCV);
1512 }
1513
1514 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1515 .addReg(IfTrueReg)
1516 .addMBB(TrueBB)
1517 .addReg(IfFalseReg)
1518 .addMBB(MBB);
1519
1520 MI.eraseFromParent();
1521 return EndBB;
1522}
1523
1524MachineBasicBlock *AArch64TargetLowering::EmitLoweredCatchRet(
1525 MachineInstr &MI, MachineBasicBlock *BB) const {
1526 assert(!isAsynchronousEHPersonality(classifyEHPersonality(((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1528, __PRETTY_FUNCTION__))
1527 BB->getParent()->getFunction().getPersonalityFn())) &&((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1528, __PRETTY_FUNCTION__))
1528 "SEH does not use catchret!")((!isAsynchronousEHPersonality(classifyEHPersonality( BB->
getParent()->getFunction().getPersonalityFn())) &&
"SEH does not use catchret!") ? static_cast<void> (0) :
__assert_fail ("!isAsynchronousEHPersonality(classifyEHPersonality( BB->getParent()->getFunction().getPersonalityFn())) && \"SEH does not use catchret!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1528, __PRETTY_FUNCTION__))
;
1529 return BB;
1530}
1531
1532MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
1533 MachineInstr &MI, MachineBasicBlock *BB) const {
1534 switch (MI.getOpcode()) {
1535 default:
1536#ifndef NDEBUG
1537 MI.dump();
1538#endif
1539 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1539)
;
1540
1541 case AArch64::F128CSEL:
1542 return EmitF128CSEL(MI, BB);
1543
1544 case TargetOpcode::STACKMAP:
1545 case TargetOpcode::PATCHPOINT:
1546 return emitPatchPoint(MI, BB);
1547
1548 case AArch64::CATCHRET:
1549 return EmitLoweredCatchRet(MI, BB);
1550 }
1551}
1552
1553//===----------------------------------------------------------------------===//
1554// AArch64 Lowering private implementation.
1555//===----------------------------------------------------------------------===//
1556
1557//===----------------------------------------------------------------------===//
1558// Lowering Code
1559//===----------------------------------------------------------------------===//
1560
1561/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1562/// CC
1563static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1564 switch (CC) {
1565 default:
1566 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1566)
;
1567 case ISD::SETNE:
1568 return AArch64CC::NE;
1569 case ISD::SETEQ:
1570 return AArch64CC::EQ;
1571 case ISD::SETGT:
1572 return AArch64CC::GT;
1573 case ISD::SETGE:
1574 return AArch64CC::GE;
1575 case ISD::SETLT:
1576 return AArch64CC::LT;
1577 case ISD::SETLE:
1578 return AArch64CC::LE;
1579 case ISD::SETUGT:
1580 return AArch64CC::HI;
1581 case ISD::SETUGE:
1582 return AArch64CC::HS;
1583 case ISD::SETULT:
1584 return AArch64CC::LO;
1585 case ISD::SETULE:
1586 return AArch64CC::LS;
1587 }
1588}
1589
1590/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1591static void changeFPCCToAArch64CC(ISD::CondCode CC,
1592 AArch64CC::CondCode &CondCode,
1593 AArch64CC::CondCode &CondCode2) {
1594 CondCode2 = AArch64CC::AL;
1595 switch (CC) {
1596 default:
1597 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1597)
;
1598 case ISD::SETEQ:
1599 case ISD::SETOEQ:
1600 CondCode = AArch64CC::EQ;
1601 break;
1602 case ISD::SETGT:
1603 case ISD::SETOGT:
1604 CondCode = AArch64CC::GT;
1605 break;
1606 case ISD::SETGE:
1607 case ISD::SETOGE:
1608 CondCode = AArch64CC::GE;
1609 break;
1610 case ISD::SETOLT:
1611 CondCode = AArch64CC::MI;
1612 break;
1613 case ISD::SETOLE:
1614 CondCode = AArch64CC::LS;
1615 break;
1616 case ISD::SETONE:
1617 CondCode = AArch64CC::MI;
1618 CondCode2 = AArch64CC::GT;
1619 break;
1620 case ISD::SETO:
1621 CondCode = AArch64CC::VC;
1622 break;
1623 case ISD::SETUO:
1624 CondCode = AArch64CC::VS;
1625 break;
1626 case ISD::SETUEQ:
1627 CondCode = AArch64CC::EQ;
1628 CondCode2 = AArch64CC::VS;
1629 break;
1630 case ISD::SETUGT:
1631 CondCode = AArch64CC::HI;
1632 break;
1633 case ISD::SETUGE:
1634 CondCode = AArch64CC::PL;
1635 break;
1636 case ISD::SETLT:
1637 case ISD::SETULT:
1638 CondCode = AArch64CC::LT;
1639 break;
1640 case ISD::SETLE:
1641 case ISD::SETULE:
1642 CondCode = AArch64CC::LE;
1643 break;
1644 case ISD::SETNE:
1645 case ISD::SETUNE:
1646 CondCode = AArch64CC::NE;
1647 break;
1648 }
1649}
1650
1651/// Convert a DAG fp condition code to an AArch64 CC.
1652/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1653/// should be AND'ed instead of OR'ed.
1654static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1655 AArch64CC::CondCode &CondCode,
1656 AArch64CC::CondCode &CondCode2) {
1657 CondCode2 = AArch64CC::AL;
1658 switch (CC) {
1659 default:
1660 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1661 assert(CondCode2 == AArch64CC::AL)((CondCode2 == AArch64CC::AL) ? static_cast<void> (0) :
__assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1661, __PRETTY_FUNCTION__))
;
1662 break;
1663 case ISD::SETONE:
1664 // (a one b)
1665 // == ((a olt b) || (a ogt b))
1666 // == ((a ord b) && (a une b))
1667 CondCode = AArch64CC::VC;
1668 CondCode2 = AArch64CC::NE;
1669 break;
1670 case ISD::SETUEQ:
1671 // (a ueq b)
1672 // == ((a uno b) || (a oeq b))
1673 // == ((a ule b) && (a uge b))
1674 CondCode = AArch64CC::PL;
1675 CondCode2 = AArch64CC::LE;
1676 break;
1677 }
1678}
1679
1680/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1681/// CC usable with the vector instructions. Fewer operations are available
1682/// without a real NZCV register, so we have to use less efficient combinations
1683/// to get the same effect.
1684static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1685 AArch64CC::CondCode &CondCode,
1686 AArch64CC::CondCode &CondCode2,
1687 bool &Invert) {
1688 Invert = false;
1689 switch (CC) {
1690 default:
1691 // Mostly the scalar mappings work fine.
1692 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1693 break;
1694 case ISD::SETUO:
1695 Invert = true;
1696 LLVM_FALLTHROUGH[[gnu::fallthrough]];
1697 case ISD::SETO:
1698 CondCode = AArch64CC::MI;
1699 CondCode2 = AArch64CC::GE;
1700 break;
1701 case ISD::SETUEQ:
1702 case ISD::SETULT:
1703 case ISD::SETULE:
1704 case ISD::SETUGT:
1705 case ISD::SETUGE:
1706 // All of the compare-mask comparisons are ordered, but we can switch
1707 // between the two by a double inversion. E.g. ULE == !OGT.
1708 Invert = true;
1709 changeFPCCToAArch64CC(getSetCCInverse(CC, /* FP inverse */ MVT::f32),
1710 CondCode, CondCode2);
1711 break;
1712 }
1713}
1714
1715static bool isLegalArithImmed(uint64_t C) {
1716 // Matches AArch64DAGToDAGISel::SelectArithImmed().
1717 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1718 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
1719 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
1720 return IsLegal;
1721}
1722
1723// Can a (CMP op1, (sub 0, op2) be turned into a CMN instruction on
1724// the grounds that "op1 - (-op2) == op1 + op2" ? Not always, the C and V flags
1725// can be set differently by this operation. It comes down to whether
1726// "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1727// everything is fine. If not then the optimization is wrong. Thus general
1728// comparisons are only valid if op2 != 0.
1729//
1730// So, finally, the only LLVM-native comparisons that don't mention C and V
1731// are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1732// the absence of information about op2.
1733static bool isCMN(SDValue Op, ISD::CondCode CC) {
1734 return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
1735 (CC == ISD::SETEQ || CC == ISD::SETNE);
1736}
1737
1738static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
1739 SelectionDAG &DAG, SDValue Chain,
1740 bool IsSignaling) {
1741 EVT VT = LHS.getValueType();
1742 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1742, __PRETTY_FUNCTION__))
;
1743 assert(VT != MVT::f16 && "Lowering of strict fp16 not yet implemented")((VT != MVT::f16 && "Lowering of strict fp16 not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("VT != MVT::f16 && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1743, __PRETTY_FUNCTION__))
;
1744 unsigned Opcode =
1745 IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
1746 return DAG.getNode(Opcode, dl, {VT, MVT::Other}, {Chain, LHS, RHS});
1747}
1748
1749static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1750 const SDLoc &dl, SelectionDAG &DAG) {
1751 EVT VT = LHS.getValueType();
1752 const bool FullFP16 =
1753 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1754
1755 if (VT.isFloatingPoint()) {
1756 assert(VT != MVT::f128)((VT != MVT::f128) ? static_cast<void> (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1756, __PRETTY_FUNCTION__))
;
1757 if (VT == MVT::f16 && !FullFP16) {
1758 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1759 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1760 VT = MVT::f32;
1761 }
1762 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1763 }
1764
1765 // The CMP instruction is just an alias for SUBS, and representing it as
1766 // SUBS means that it's possible to get CSE with subtract operations.
1767 // A later phase can perform the optimization of setting the destination
1768 // register to WZR/XZR if it ends up being unused.
1769 unsigned Opcode = AArch64ISD::SUBS;
1770
1771 if (isCMN(RHS, CC)) {
1772 // Can we combine a (CMP op1, (sub 0, op2) into a CMN instruction ?
1773 Opcode = AArch64ISD::ADDS;
1774 RHS = RHS.getOperand(1);
1775 } else if (isCMN(LHS, CC)) {
1776 // As we are looking for EQ/NE compares, the operands can be commuted ; can
1777 // we combine a (CMP (sub 0, op1), op2) into a CMN instruction ?
1778 Opcode = AArch64ISD::ADDS;
1779 LHS = LHS.getOperand(1);
1780 } else if (isNullConstant(RHS) && !isUnsignedIntSetCC(CC)) {
1781 if (LHS.getOpcode() == ISD::AND) {
1782 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1783 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1784 // of the signed comparisons.
1785 const SDValue ANDSNode = DAG.getNode(AArch64ISD::ANDS, dl,
1786 DAG.getVTList(VT, MVT_CC),
1787 LHS.getOperand(0),
1788 LHS.getOperand(1));
1789 // Replace all users of (and X, Y) with newly generated (ands X, Y)
1790 DAG.ReplaceAllUsesWith(LHS, ANDSNode);
1791 return ANDSNode.getValue(1);
1792 } else if (LHS.getOpcode() == AArch64ISD::ANDS) {
1793 // Use result of ANDS
1794 return LHS.getValue(1);
1795 }
1796 }
1797
1798 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1799 .getValue(1);
1800}
1801
1802/// \defgroup AArch64CCMP CMP;CCMP matching
1803///
1804/// These functions deal with the formation of CMP;CCMP;... sequences.
1805/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1806/// a comparison. They set the NZCV flags to a predefined value if their
1807/// predicate is false. This allows to express arbitrary conjunctions, for
1808/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B)))"
1809/// expressed as:
1810/// cmp A
1811/// ccmp B, inv(CB), CA
1812/// check for CB flags
1813///
1814/// This naturally lets us implement chains of AND operations with SETCC
1815/// operands. And we can even implement some other situations by transforming
1816/// them:
1817/// - We can implement (NEG SETCC) i.e. negating a single comparison by
1818/// negating the flags used in a CCMP/FCCMP operations.
1819/// - We can negate the result of a whole chain of CMP/CCMP/FCCMP operations
1820/// by negating the flags we test for afterwards. i.e.
1821/// NEG (CMP CCMP CCCMP ...) can be implemented.
1822/// - Note that we can only ever negate all previously processed results.
1823/// What we can not implement by flipping the flags to test is a negation
1824/// of two sub-trees (because the negation affects all sub-trees emitted so
1825/// far, so the 2nd sub-tree we emit would also affect the first).
1826/// With those tools we can implement some OR operations:
1827/// - (OR (SETCC A) (SETCC B)) can be implemented via:
1828/// NEG (AND (NEG (SETCC A)) (NEG (SETCC B)))
1829/// - After transforming OR to NEG/AND combinations we may be able to use NEG
1830/// elimination rules from earlier to implement the whole thing as a
1831/// CCMP/FCCMP chain.
1832///
1833/// As complete example:
1834/// or (or (setCA (cmp A)) (setCB (cmp B)))
1835/// (and (setCC (cmp C)) (setCD (cmp D)))"
1836/// can be reassociated to:
1837/// or (and (setCC (cmp C)) setCD (cmp D))
1838// (or (setCA (cmp A)) (setCB (cmp B)))
1839/// can be transformed to:
1840/// not (and (not (and (setCC (cmp C)) (setCD (cmp D))))
1841/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1842/// which can be implemented as:
1843/// cmp C
1844/// ccmp D, inv(CD), CC
1845/// ccmp A, CA, inv(CD)
1846/// ccmp B, CB, inv(CA)
1847/// check for CB flags
1848///
1849/// A counterexample is "or (and A B) (and C D)" which translates to
1850/// not (and (not (and (not A) (not B))) (not (and (not C) (not D)))), we
1851/// can only implement 1 of the inner (not) operations, but not both!
1852/// @{
1853
1854/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1855static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1856 ISD::CondCode CC, SDValue CCOp,
1857 AArch64CC::CondCode Predicate,
1858 AArch64CC::CondCode OutCC,
1859 const SDLoc &DL, SelectionDAG &DAG) {
1860 unsigned Opcode = 0;
1861 const bool FullFP16 =
1862 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1863
1864 if (LHS.getValueType().isFloatingPoint()) {
1865 assert(LHS.getValueType() != MVT::f128)((LHS.getValueType() != MVT::f128) ? static_cast<void> (
0) : __assert_fail ("LHS.getValueType() != MVT::f128", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1865, __PRETTY_FUNCTION__))
;
1866 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
1867 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1868 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1869 }
1870 Opcode = AArch64ISD::FCCMP;
1871 } else if (RHS.getOpcode() == ISD::SUB) {
1872 SDValue SubOp0 = RHS.getOperand(0);
1873 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1874 // See emitComparison() on why we can only do this for SETEQ and SETNE.
1875 Opcode = AArch64ISD::CCMN;
1876 RHS = RHS.getOperand(1);
1877 }
1878 }
1879 if (Opcode == 0)
1880 Opcode = AArch64ISD::CCMP;
1881
1882 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1883 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1884 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1885 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1886 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1887}
1888
1889/// Returns true if @p Val is a tree of AND/OR/SETCC operations that can be
1890/// expressed as a conjunction. See \ref AArch64CCMP.
1891/// \param CanNegate Set to true if we can negate the whole sub-tree just by
1892/// changing the conditions on the SETCC tests.
1893/// (this means we can call emitConjunctionRec() with
1894/// Negate==true on this sub-tree)
1895/// \param MustBeFirst Set to true if this subtree needs to be negated and we
1896/// cannot do the negation naturally. We are required to
1897/// emit the subtree first in this case.
1898/// \param WillNegate Is true if are called when the result of this
1899/// subexpression must be negated. This happens when the
1900/// outer expression is an OR. We can use this fact to know
1901/// that we have a double negation (or (or ...) ...) that
1902/// can be implemented for free.
1903static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
1904 bool &MustBeFirst, bool WillNegate,
1905 unsigned Depth = 0) {
1906 if (!Val.hasOneUse())
1907 return false;
1908 unsigned Opcode = Val->getOpcode();
1909 if (Opcode == ISD::SETCC) {
1910 if (Val->getOperand(0).getValueType() == MVT::f128)
1911 return false;
1912 CanNegate = true;
1913 MustBeFirst = false;
1914 return true;
1915 }
1916 // Protect against exponential runtime and stack overflow.
1917 if (Depth > 6)
1918 return false;
1919 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1920 bool IsOR = Opcode == ISD::OR;
1921 SDValue O0 = Val->getOperand(0);
1922 SDValue O1 = Val->getOperand(1);
1923 bool CanNegateL;
1924 bool MustBeFirstL;
1925 if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
1926 return false;
1927 bool CanNegateR;
1928 bool MustBeFirstR;
1929 if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
1930 return false;
1931
1932 if (MustBeFirstL && MustBeFirstR)
1933 return false;
1934
1935 if (IsOR) {
1936 // For an OR expression we need to be able to naturally negate at least
1937 // one side or we cannot do the transformation at all.
1938 if (!CanNegateL && !CanNegateR)
1939 return false;
1940 // If we the result of the OR will be negated and we can naturally negate
1941 // the leafs, then this sub-tree as a whole negates naturally.
1942 CanNegate = WillNegate && CanNegateL && CanNegateR;
1943 // If we cannot naturally negate the whole sub-tree, then this must be
1944 // emitted first.
1945 MustBeFirst = !CanNegate;
1946 } else {
1947 assert(Opcode == ISD::AND && "Must be OR or AND")((Opcode == ISD::AND && "Must be OR or AND") ? static_cast
<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Must be OR or AND\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1947, __PRETTY_FUNCTION__))
;
1948 // We cannot naturally negate an AND operation.
1949 CanNegate = false;
1950 MustBeFirst = MustBeFirstL || MustBeFirstR;
1951 }
1952 return true;
1953 }
1954 return false;
1955}
1956
1957/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1958/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1959/// Tries to transform the given i1 producing node @p Val to a series compare
1960/// and conditional compare operations. @returns an NZCV flags producing node
1961/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1962/// transformation was not possible.
1963/// \p Negate is true if we want this sub-tree being negated just by changing
1964/// SETCC conditions.
1965static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
1966 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1967 AArch64CC::CondCode Predicate) {
1968 // We're at a tree leaf, produce a conditional comparison operation.
1969 unsigned Opcode = Val->getOpcode();
1970 if (Opcode == ISD::SETCC) {
1971 SDValue LHS = Val->getOperand(0);
1972 SDValue RHS = Val->getOperand(1);
1973 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1974 bool isInteger = LHS.getValueType().isInteger();
1975 if (Negate)
1976 CC = getSetCCInverse(CC, LHS.getValueType());
1977 SDLoc DL(Val);
1978 // Determine OutCC and handle FP special case.
1979 if (isInteger) {
1980 OutCC = changeIntCCToAArch64CC(CC);
1981 } else {
1982 assert(LHS.getValueType().isFloatingPoint())((LHS.getValueType().isFloatingPoint()) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1982, __PRETTY_FUNCTION__))
;
1983 AArch64CC::CondCode ExtraCC;
1984 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1985 // Some floating point conditions can't be tested with a single condition
1986 // code. Construct an additional comparison in this case.
1987 if (ExtraCC != AArch64CC::AL) {
1988 SDValue ExtraCmp;
1989 if (!CCOp.getNode())
1990 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1991 else
1992 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1993 ExtraCC, DL, DAG);
1994 CCOp = ExtraCmp;
1995 Predicate = ExtraCC;
1996 }
1997 }
1998
1999 // Produce a normal comparison if we are first in the chain
2000 if (!CCOp)
2001 return emitComparison(LHS, RHS, CC, DL, DAG);
2002 // Otherwise produce a ccmp.
2003 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
2004 DAG);
2005 }
2006 assert(Val->hasOneUse() && "Valid conjunction/disjunction tree")((Val->hasOneUse() && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Val->hasOneUse() && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2006, __PRETTY_FUNCTION__))
;
2007
2008 bool IsOR = Opcode == ISD::OR;
2009
2010 SDValue LHS = Val->getOperand(0);
2011 bool CanNegateL;
2012 bool MustBeFirstL;
2013 bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
2014 assert(ValidL && "Valid conjunction/disjunction tree")((ValidL && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2014, __PRETTY_FUNCTION__))
;
2015 (void)ValidL;
2016
2017 SDValue RHS = Val->getOperand(1);
2018 bool CanNegateR;
2019 bool MustBeFirstR;
2020 bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
2021 assert(ValidR && "Valid conjunction/disjunction tree")((ValidR && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("ValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2021, __PRETTY_FUNCTION__))
;
2022 (void)ValidR;
2023
2024 // Swap sub-tree that must come first to the right side.
2025 if (MustBeFirstL) {
2026 assert(!MustBeFirstR && "Valid conjunction/disjunction tree")((!MustBeFirstR && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2026, __PRETTY_FUNCTION__))
;
2027 std::swap(LHS, RHS);
2028 std::swap(CanNegateL, CanNegateR);
2029 std::swap(MustBeFirstL, MustBeFirstR);
2030 }
2031
2032 bool NegateR;
2033 bool NegateAfterR;
2034 bool NegateL;
2035 bool NegateAfterAll;
2036 if (Opcode == ISD::OR) {
2037 // Swap the sub-tree that we can negate naturally to the left.
2038 if (!CanNegateL) {
2039 assert(CanNegateR && "at least one side must be negatable")((CanNegateR && "at least one side must be negatable"
) ? static_cast<void> (0) : __assert_fail ("CanNegateR && \"at least one side must be negatable\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2039, __PRETTY_FUNCTION__))
;
2040 assert(!MustBeFirstR && "invalid conjunction/disjunction tree")((!MustBeFirstR && "invalid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("!MustBeFirstR && \"invalid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2040, __PRETTY_FUNCTION__))
;
2041 assert(!Negate)((!Negate) ? static_cast<void> (0) : __assert_fail ("!Negate"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2041, __PRETTY_FUNCTION__))
;
2042 std::swap(LHS, RHS);
2043 NegateR = false;
2044 NegateAfterR = true;
2045 } else {
2046 // Negate the left sub-tree if possible, otherwise negate the result.
2047 NegateR = CanNegateR;
2048 NegateAfterR = !CanNegateR;
2049 }
2050 NegateL = true;
2051 NegateAfterAll = !Negate;
2052 } else {
2053 assert(Opcode == ISD::AND && "Valid conjunction/disjunction tree")((Opcode == ISD::AND && "Valid conjunction/disjunction tree"
) ? static_cast<void> (0) : __assert_fail ("Opcode == ISD::AND && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2053, __PRETTY_FUNCTION__))
;
2054 assert(!Negate && "Valid conjunction/disjunction tree")((!Negate && "Valid conjunction/disjunction tree") ? static_cast
<void> (0) : __assert_fail ("!Negate && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2054, __PRETTY_FUNCTION__))
;
2055
2056 NegateL = false;
2057 NegateR = false;
2058 NegateAfterR = false;
2059 NegateAfterAll = false;
2060 }
2061
2062 // Emit sub-trees.
2063 AArch64CC::CondCode RHSCC;
2064 SDValue CmpR = emitConjunctionRec(DAG, RHS, RHSCC, NegateR, CCOp, Predicate);
2065 if (NegateAfterR)
2066 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
2067 SDValue CmpL = emitConjunctionRec(DAG, LHS, OutCC, NegateL, CmpR, RHSCC);
2068 if (NegateAfterAll)
2069 OutCC = AArch64CC::getInvertedCondCode(OutCC);
2070 return CmpL;
2071}
2072
2073/// Emit expression as a conjunction (a series of CCMP/CFCMP ops).
2074/// In some cases this is even possible with OR operations in the expression.
2075/// See \ref AArch64CCMP.
2076/// \see emitConjunctionRec().
2077static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
2078 AArch64CC::CondCode &OutCC) {
2079 bool DummyCanNegate;
2080 bool DummyMustBeFirst;
2081 if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
2082 return SDValue();
2083
2084 return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
2085}
2086
2087/// @}
2088
2089/// Returns how profitable it is to fold a comparison's operand's shift and/or
2090/// extension operations.
2091static unsigned getCmpOperandFoldingProfit(SDValue Op) {
2092 auto isSupportedExtend = [&](SDValue V) {
2093 if (V.getOpcode() == ISD::SIGN_EXTEND_INREG)
2094 return true;
2095
2096 if (V.getOpcode() == ISD::AND)
2097 if (ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
2098 uint64_t Mask = MaskCst->getZExtValue();
2099 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF);
2100 }
2101
2102 return false;
2103 };
2104
2105 if (!Op.hasOneUse())
2106 return 0;
2107
2108 if (isSupportedExtend(Op))
2109 return 1;
2110
2111 unsigned Opc = Op.getOpcode();
2112 if (Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA)
2113 if (ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
2114 uint64_t Shift = ShiftCst->getZExtValue();
2115 if (isSupportedExtend(Op.getOperand(0)))
2116 return (Shift <= 4) ? 2 : 1;
2117 EVT VT = Op.getValueType();
2118 if ((VT == MVT::i32 && Shift <= 31) || (VT == MVT::i64 && Shift <= 63))
2119 return 1;
2120 }
2121
2122 return 0;
2123}
2124
2125static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
2126 SDValue &AArch64cc, SelectionDAG &DAG,
2127 const SDLoc &dl) {
2128 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
2129 EVT VT = RHS.getValueType();
2130 uint64_t C = RHSC->getZExtValue();
2131 if (!isLegalArithImmed(C)) {
2132 // Constant does not fit, try adjusting it by one?
2133 switch (CC) {
2134 default:
2135 break;
2136 case ISD::SETLT:
2137 case ISD::SETGE:
2138 if ((VT == MVT::i32 && C != 0x80000000 &&
2139 isLegalArithImmed((uint32_t)(C - 1))) ||
2140 (VT == MVT::i64 && C != 0x80000000ULL &&
2141 isLegalArithImmed(C - 1ULL))) {
2142 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
2143 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2144 RHS = DAG.getConstant(C, dl, VT);
2145 }
2146 break;
2147 case ISD::SETULT:
2148 case ISD::SETUGE:
2149 if ((VT == MVT::i32 && C != 0 &&
2150 isLegalArithImmed((uint32_t)(C - 1))) ||
2151 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
2152 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
2153 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
2154 RHS = DAG.getConstant(C, dl, VT);
2155 }
2156 break;
2157 case ISD::SETLE:
2158 case ISD::SETGT:
2159 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
2160 isLegalArithImmed((uint32_t)(C + 1))) ||
2161 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
2162 isLegalArithImmed(C + 1ULL))) {
2163 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
2164 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2165 RHS = DAG.getConstant(C, dl, VT);
2166 }
2167 break;
2168 case ISD::SETULE:
2169 case ISD::SETUGT:
2170 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
2171 isLegalArithImmed((uint32_t)(C + 1))) ||
2172 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
2173 isLegalArithImmed(C + 1ULL))) {
2174 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2175 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
2176 RHS = DAG.getConstant(C, dl, VT);
2177 }
2178 break;
2179 }
2180 }
2181 }
2182
2183 // Comparisons are canonicalized so that the RHS operand is simpler than the
2184 // LHS one, the extreme case being when RHS is an immediate. However, AArch64
2185 // can fold some shift+extend operations on the RHS operand, so swap the
2186 // operands if that can be done.
2187 //
2188 // For example:
2189 // lsl w13, w11, #1
2190 // cmp w13, w12
2191 // can be turned into:
2192 // cmp w12, w11, lsl #1
2193 if (!isa<ConstantSDNode>(RHS) ||
2194 !isLegalArithImmed(cast<ConstantSDNode>(RHS)->getZExtValue())) {
2195 SDValue TheLHS = isCMN(LHS, CC) ? LHS.getOperand(1) : LHS;
2196
2197 if (getCmpOperandFoldingProfit(TheLHS) > getCmpOperandFoldingProfit(RHS)) {
2198 std::swap(LHS, RHS);
2199 CC = ISD::getSetCCSwappedOperands(CC);
2200 }
2201 }
2202
2203 SDValue Cmp;
2204 AArch64CC::CondCode AArch64CC;
2205 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
2206 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
2207
2208 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
2209 // For the i8 operand, the largest immediate is 255, so this can be easily
2210 // encoded in the compare instruction. For the i16 operand, however, the
2211 // largest immediate cannot be encoded in the compare.
2212 // Therefore, use a sign extending load and cmn to avoid materializing the
2213 // -1 constant. For example,
2214 // movz w1, #65535
2215 // ldrh w0, [x0, #0]
2216 // cmp w0, w1
2217 // >
2218 // ldrsh w0, [x0, #0]
2219 // cmn w0, #1
2220 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
2221 // if and only if (sext LHS) == (sext RHS). The checks are in place to
2222 // ensure both the LHS and RHS are truly zero extended and to make sure the
2223 // transformation is profitable.
2224 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
2225 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
2226 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
2227 LHS.getNode()->hasNUsesOfValue(1, 0)) {
2228 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
2229 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
2230 SDValue SExt =
2231 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
2232 DAG.getValueType(MVT::i16));
2233 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
2234 RHS.getValueType()),
2235 CC, dl, DAG);
2236 AArch64CC = changeIntCCToAArch64CC(CC);
2237 }
2238 }
2239
2240 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
2241 if ((Cmp = emitConjunction(DAG, LHS, AArch64CC))) {
2242 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
2243 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
2244 }
2245 }
2246 }
2247
2248 if (!Cmp) {
2249 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
2250 AArch64CC = changeIntCCToAArch64CC(CC);
2251 }
2252 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
2253 return Cmp;
2254}
2255
2256static std::pair<SDValue, SDValue>
2257getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
2258 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2259, __PRETTY_FUNCTION__))
2259 "Unsupported value type")(((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::
i64) && "Unsupported value type") ? static_cast<void
> (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2259, __PRETTY_FUNCTION__))
;
2260 SDValue Value, Overflow;
2261 SDLoc DL(Op);
2262 SDValue LHS = Op.getOperand(0);
2263 SDValue RHS = Op.getOperand(1);
2264 unsigned Opc = 0;
2265 switch (Op.getOpcode()) {
2266 default:
2267 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2267)
;
2268 case ISD::SADDO:
2269 Opc = AArch64ISD::ADDS;
2270 CC = AArch64CC::VS;
2271 break;
2272 case ISD::UADDO:
2273 Opc = AArch64ISD::ADDS;
2274 CC = AArch64CC::HS;
2275 break;
2276 case ISD::SSUBO:
2277 Opc = AArch64ISD::SUBS;
2278 CC = AArch64CC::VS;
2279 break;
2280 case ISD::USUBO:
2281 Opc = AArch64ISD::SUBS;
2282 CC = AArch64CC::LO;
2283 break;
2284 // Multiply needs a little bit extra work.
2285 case ISD::SMULO:
2286 case ISD::UMULO: {
2287 CC = AArch64CC::NE;
2288 bool IsSigned = Op.getOpcode() == ISD::SMULO;
2289 if (Op.getValueType() == MVT::i32) {
2290 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2291 // For a 32 bit multiply with overflow check we want the instruction
2292 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
2293 // need to generate the following pattern:
2294 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
2295 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
2296 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
2297 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2298 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
2299 DAG.getConstant(0, DL, MVT::i64));
2300 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
2301 // operation. We need to clear out the upper 32 bits, because we used a
2302 // widening multiply that wrote all 64 bits. In the end this should be a
2303 // noop.
2304 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
2305 if (IsSigned) {
2306 // The signed overflow check requires more than just a simple check for
2307 // any bit set in the upper 32 bits of the result. These bits could be
2308 // just the sign bits of a negative number. To perform the overflow
2309 // check we have to arithmetic shift right the 32nd bit of the result by
2310 // 31 bits. Then we compare the result to the upper 32 bits.
2311 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
2312 DAG.getConstant(32, DL, MVT::i64));
2313 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
2314 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
2315 DAG.getConstant(31, DL, MVT::i64));
2316 // It is important that LowerBits is last, otherwise the arithmetic
2317 // shift will not be folded into the compare (SUBS).
2318 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
2319 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2320 .getValue(1);
2321 } else {
2322 // The overflow check for unsigned multiply is easy. We only need to
2323 // check if any of the upper 32 bits are set. This can be done with a
2324 // CMP (shifted register). For that we need to generate the following
2325 // pattern:
2326 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
2327 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
2328 DAG.getConstant(32, DL, MVT::i64));
2329 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2330 Overflow =
2331 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2332 DAG.getConstant(0, DL, MVT::i64),
2333 UpperBits).getValue(1);
2334 }
2335 break;
2336 }
2337 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")((Op.getValueType() == MVT::i64 && "Expected an i64 value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2337, __PRETTY_FUNCTION__))
;
2338 // For the 64 bit multiply
2339 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
2340 if (IsSigned) {
2341 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
2342 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
2343 DAG.getConstant(63, DL, MVT::i64));
2344 // It is important that LowerBits is last, otherwise the arithmetic
2345 // shift will not be folded into the compare (SUBS).
2346 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2347 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
2348 .getValue(1);
2349 } else {
2350 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
2351 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
2352 Overflow =
2353 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
2354 DAG.getConstant(0, DL, MVT::i64),
2355 UpperBits).getValue(1);
2356 }
2357 break;
2358 }
2359 } // switch (...)
2360
2361 if (Opc) {
2362 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
2363
2364 // Emit the AArch64 operation with overflow check.
2365 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
2366 Overflow = Value.getValue(1);
2367 }
2368 return std::make_pair(Value, Overflow);
2369}
2370
2371SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
2372 RTLIB::Libcall Call) const {
2373 bool IsStrict = Op->isStrictFPOpcode();
2374 unsigned Offset = IsStrict ? 1 : 0;
2375 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
2376 SmallVector<SDValue, 2> Ops(Op->op_begin() + Offset, Op->op_end());
2377 MakeLibCallOptions CallOptions;
2378 SDValue Result;
2379 SDLoc dl(Op);
2380 std::tie(Result, Chain) = makeLibCall(DAG, Call, Op.getValueType(), Ops,
2381 CallOptions, dl, Chain);
2382 return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
2383}
2384
2385static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
2386 SDValue Sel = Op.getOperand(0);
2387 SDValue Other = Op.getOperand(1);
2388 SDLoc dl(Sel);
2389
2390 // If the operand is an overflow checking operation, invert the condition
2391 // code and kill the Not operation. I.e., transform:
2392 // (xor (overflow_op_bool, 1))
2393 // -->
2394 // (csel 1, 0, invert(cc), overflow_op_bool)
2395 // ... which later gets transformed to just a cset instruction with an
2396 // inverted condition code, rather than a cset + eor sequence.
2397 if (isOneConstant(Other) && ISD::isOverflowIntrOpRes(Sel)) {
2398 // Only lower legal XALUO ops.
2399 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2400 return SDValue();
2401
2402 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2403 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2404 AArch64CC::CondCode CC;
2405 SDValue Value, Overflow;
2406 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2407 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2408 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2409 CCVal, Overflow);
2410 }
2411 // If neither operand is a SELECT_CC, give up.
2412 if (Sel.getOpcode() != ISD::SELECT_CC)
2413 std::swap(Sel, Other);
2414 if (Sel.getOpcode() != ISD::SELECT_CC)
2415 return Op;
2416
2417 // The folding we want to perform is:
2418 // (xor x, (select_cc a, b, cc, 0, -1) )
2419 // -->
2420 // (csel x, (xor x, -1), cc ...)
2421 //
2422 // The latter will get matched to a CSINV instruction.
2423
2424 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2425 SDValue LHS = Sel.getOperand(0);
2426 SDValue RHS = Sel.getOperand(1);
2427 SDValue TVal = Sel.getOperand(2);
2428 SDValue FVal = Sel.getOperand(3);
2429
2430 // FIXME: This could be generalized to non-integer comparisons.
2431 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2432 return Op;
2433
2434 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2435 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2436
2437 // The values aren't constants, this isn't the pattern we're looking for.
2438 if (!CFVal || !CTVal)
2439 return Op;
2440
2441 // We can commute the SELECT_CC by inverting the condition. This
2442 // might be needed to make this fit into a CSINV pattern.
2443 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2444 std::swap(TVal, FVal);
2445 std::swap(CTVal, CFVal);
2446 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
2447 }
2448
2449 // If the constants line up, perform the transform!
2450 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2451 SDValue CCVal;
2452 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2453
2454 FVal = Other;
2455 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2456 DAG.getConstant(-1ULL, dl, Other.getValueType()));
2457
2458 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2459 CCVal, Cmp);
2460 }
2461
2462 return Op;
2463}
2464
2465static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2466 EVT VT = Op.getValueType();
2467
2468 // Let legalize expand this if it isn't a legal type yet.
2469 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2470 return SDValue();
2471
2472 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2473
2474 unsigned Opc;
2475 bool ExtraOp = false;
2476 switch (Op.getOpcode()) {
2477 default:
2478 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2478)
;
2479 case ISD::ADDC:
2480 Opc = AArch64ISD::ADDS;
2481 break;
2482 case ISD::SUBC:
2483 Opc = AArch64ISD::SUBS;
2484 break;
2485 case ISD::ADDE:
2486 Opc = AArch64ISD::ADCS;
2487 ExtraOp = true;
2488 break;
2489 case ISD::SUBE:
2490 Opc = AArch64ISD::SBCS;
2491 ExtraOp = true;
2492 break;
2493 }
2494
2495 if (!ExtraOp)
2496 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2497 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2498 Op.getOperand(2));
2499}
2500
2501static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
2502 // Let legalize expand this if it isn't a legal type yet.
2503 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
2504 return SDValue();
2505
2506 SDLoc dl(Op);
2507 AArch64CC::CondCode CC;
2508 // The actual operation that sets the overflow or carry flag.
2509 SDValue Value, Overflow;
2510 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2511
2512 // We use 0 and 1 as false and true values.
2513 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2514 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2515
2516 // We use an inverted condition, because the conditional select is inverted
2517 // too. This will allow it to be selected to a single instruction:
2518 // CSINC Wd, WZR, WZR, invert(cond).
2519 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2520 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2521 CCVal, Overflow);
2522
2523 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2524 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2525}
2526
2527// Prefetch operands are:
2528// 1: Address to prefetch
2529// 2: bool isWrite
2530// 3: int locality (0 = no locality ... 3 = extreme locality)
2531// 4: bool isDataCache
2532static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
2533 SDLoc DL(Op);
2534 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2535 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2536 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2537
2538 bool IsStream = !Locality;
2539 // When the locality number is set
2540 if (Locality) {
2541 // The front-end should have filtered out the out-of-range values
2542 assert(Locality <= 3 && "Prefetch locality out-of-range")((Locality <= 3 && "Prefetch locality out-of-range"
) ? static_cast<void> (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2542, __PRETTY_FUNCTION__))
;
2543 // The locality degree is the opposite of the cache speed.
2544 // Put the number the other way around.
2545 // The encoding starts at 0 for level 1
2546 Locality = 3 - Locality;
2547 }
2548
2549 // built the mask value encoding the expected behavior.
2550 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
2551 (!IsData << 3) | // IsDataCache bit
2552 (Locality << 1) | // Cache level bits
2553 (unsigned)IsStream; // Stream bit
2554 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2555 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2556}
2557
2558SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2559 SelectionDAG &DAG) const {
2560 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")((Op.getValueType() == MVT::f128 && "Unexpected lowering"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2560, __PRETTY_FUNCTION__))
;
2561
2562 RTLIB::Libcall LC;
2563 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
2564
2565 return LowerF128Call(Op, DAG, LC);
2566}
2567
2568SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2569 SelectionDAG &DAG) const {
2570 bool IsStrict = Op->isStrictFPOpcode();
2571 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
2572 if (SrcVal.getValueType() != MVT::f128) {
2573 // It's legal except when f128 is involved
2574 return Op;
2575 }
2576
2577 RTLIB::Libcall LC;
2578 LC = RTLIB::getFPROUND(SrcVal.getValueType(), Op.getValueType());
2579
2580 // FP_ROUND node has a second operand indicating whether it is known to be
2581 // precise. That doesn't take part in the LibCall so we can't directly use
2582 // LowerF128Call.
2583 MakeLibCallOptions CallOptions;
2584 SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
2585 SDValue Result;
2586 SDLoc dl(Op);
2587 std::tie(Result, Chain) = makeLibCall(DAG, LC, Op.getValueType(), SrcVal,
2588 CallOptions, dl, Chain);
2589 return IsStrict ? DAG.getMergeValues({Result, Chain}, dl) : Result;
2590}
2591
2592SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
2593 SelectionDAG &DAG) const {
2594 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2595 // Any additional optimization in this function should be recorded
2596 // in the cost tables.
2597 EVT InVT = Op.getOperand(0).getValueType();
2598 EVT VT = Op.getValueType();
2599 unsigned NumElts = InVT.getVectorNumElements();
2600
2601 // f16 conversions are promoted to f32 when full fp16 is not supported.
2602 if (InVT.getVectorElementType() == MVT::f16 &&
2603 !Subtarget->hasFullFP16()) {
2604 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2605 SDLoc dl(Op);
2606 return DAG.getNode(
2607 Op.getOpcode(), dl, Op.getValueType(),
2608 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2609 }
2610
2611 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2612 SDLoc dl(Op);
2613 SDValue Cv =
2614 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2615 Op.getOperand(0));
2616 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2617 }
2618
2619 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2620 SDLoc dl(Op);
2621 MVT ExtVT =
2622 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
2623 VT.getVectorNumElements());
2624 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2625 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2626 }
2627
2628 // Type changing conversions are illegal.
2629 return Op;
2630}
2631
2632SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2633 SelectionDAG &DAG) const {
2634 bool IsStrict = Op->isStrictFPOpcode();
2635 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
2636
2637 if (SrcVal.getValueType().isVector())
2638 return LowerVectorFP_TO_INT(Op, DAG);
2639
2640 // f16 conversions are promoted to f32 when full fp16 is not supported.
2641 if (SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
2642 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")((!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2642, __PRETTY_FUNCTION__))
;
2643 SDLoc dl(Op);
2644 return DAG.getNode(
2645 Op.getOpcode(), dl, Op.getValueType(),
2646 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
2647 }
2648
2649 if (SrcVal.getValueType() != MVT::f128) {
2650 // It's legal except when f128 is involved
2651 return Op;
2652 }
2653
2654 RTLIB::Libcall LC;
2655 if (Op.getOpcode() == ISD::FP_TO_SINT ||
2656 Op.getOpcode() == ISD::STRICT_FP_TO_SINT)
2657 LC = RTLIB::getFPTOSINT(SrcVal.getValueType(), Op.getValueType());
2658 else
2659 LC = RTLIB::getFPTOUINT(SrcVal.getValueType(), Op.getValueType());
2660
2661 return LowerF128Call(Op, DAG, LC);
2662}
2663
2664static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2665 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2666 // Any additional optimization in this function should be recorded
2667 // in the cost tables.
2668 EVT VT = Op.getValueType();
2669 SDLoc dl(Op);
2670 SDValue In = Op.getOperand(0);
2671 EVT InVT = In.getValueType();
2672
2673 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2674 MVT CastVT =
2675 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
2676 InVT.getVectorNumElements());
2677 In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2678 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2679 }
2680
2681 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2682 unsigned CastOpc =
2683 Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2684 EVT CastVT = VT.changeVectorElementTypeToInteger();
2685 In = DAG.getNode(CastOpc, dl, CastVT, In);
2686 return DAG.getNode(Op.getOpcode(), dl, VT, In);
2687 }
2688
2689 return Op;
2690}
2691
2692SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2693 SelectionDAG &DAG) const {
2694 if (Op.getValueType().isVector())
2695 return LowerVectorINT_TO_FP(Op, DAG);
2696
2697 bool IsStrict = Op->isStrictFPOpcode();
2698 SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
2699
2700 // f16 conversions are promoted to f32 when full fp16 is not supported.
2701 if (Op.getValueType() == MVT::f16 &&
2702 !Subtarget->hasFullFP16()) {
2703 assert(!IsStrict && "Lowering of strict fp16 not yet implemented")((!IsStrict && "Lowering of strict fp16 not yet implemented"
) ? static_cast<void> (0) : __assert_fail ("!IsStrict && \"Lowering of strict fp16 not yet implemented\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2703, __PRETTY_FUNCTION__))
;
2704 SDLoc dl(Op);
2705 return DAG.getNode(
2706 ISD::FP_ROUND, dl, MVT::f16,
2707 DAG.getNode(Op.getOpcode(), dl, MVT::f32, SrcVal),
2708 DAG.getIntPtrConstant(0, dl));
2709 }
2710
2711 // i128 conversions are libcalls.
2712 if (SrcVal.getValueType() == MVT::i128)
2713 return SDValue();
2714
2715 // Other conversions are legal, unless it's to the completely software-based
2716 // fp128.
2717 if (Op.getValueType() != MVT::f128)
2718 return Op;
2719
2720 RTLIB::Libcall LC;
2721 if (Op.getOpcode() == ISD::SINT_TO_FP ||
2722 Op.getOpcode() == ISD::STRICT_SINT_TO_FP)
2723 LC = RTLIB::getSINTTOFP(SrcVal.getValueType(), Op.getValueType());
2724 else
2725 LC = RTLIB::getUINTTOFP(SrcVal.getValueType(), Op.getValueType());
2726
2727 return LowerF128Call(Op, DAG, LC);
2728}
2729
2730SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2731 SelectionDAG &DAG) const {
2732 // For iOS, we want to call an alternative entry point: __sincos_stret,
2733 // which returns the values in two S / D registers.
2734 SDLoc dl(Op);
2735 SDValue Arg = Op.getOperand(0);
2736 EVT ArgVT = Arg.getValueType();
2737 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2738
2739 ArgListTy Args;
2740 ArgListEntry Entry;
2741
2742 Entry.Node = Arg;
2743 Entry.Ty = ArgTy;
2744 Entry.IsSExt = false;
2745 Entry.IsZExt = false;
2746 Args.push_back(Entry);
2747
2748 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
2749 : RTLIB::SINCOS_STRET_F32;
2750 const char *LibcallName = getLibcallName(LC);
2751 SDValue Callee =
2752 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2753
2754 StructType *RetTy = StructType::get(ArgTy, ArgTy);
2755 TargetLowering::CallLoweringInfo CLI(DAG);
2756 CLI.setDebugLoc(dl)
2757 .setChain(DAG.getEntryNode())
2758 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2759
2760 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2761 return CallResult.first;
2762}
2763
2764static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2765 if (Op.getValueType() != MVT::f16)
2766 return SDValue();
2767
2768 assert(Op.getOperand(0).getValueType() == MVT::i16)((Op.getOperand(0).getValueType() == MVT::i16) ? static_cast<
void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2768, __PRETTY_FUNCTION__))
;
2769 SDLoc DL(Op);
2770
2771 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2772 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2773 return SDValue(
2774 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2775 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2776 0);
2777}
2778
2779static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2780 if (OrigVT.getSizeInBits() >= 64)
2781 return OrigVT;
2782
2783 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2783, __PRETTY_FUNCTION__))
;
2784
2785 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2786 switch (OrigSimpleTy) {
2787 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2787)
;
2788 case MVT::v2i8:
2789 case MVT::v2i16:
2790 return MVT::v2i32;
2791 case MVT::v4i8:
2792 return MVT::v4i16;
2793 }
2794}
2795
2796static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2797 const EVT &OrigTy,
2798 const EVT &ExtTy,
2799 unsigned ExtOpcode) {
2800 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2801 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2802 // 64-bits we need to insert a new extension so that it will be 64-bits.
2803 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2803, __PRETTY_FUNCTION__))
;
2804 if (OrigTy.getSizeInBits() >= 64)
2805 return N;
2806
2807 // Must extend size to at least 64 bits to be used as an operand for VMULL.
2808 EVT NewVT = getExtensionTo64Bits(OrigTy);
2809
2810 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2811}
2812
2813static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2814 bool isSigned) {
2815 EVT VT = N->getValueType(0);
2816
2817 if (N->getOpcode() != ISD::BUILD_VECTOR)
2818 return false;
2819
2820 for (const SDValue &Elt : N->op_values()) {
2821 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2822 unsigned EltSize = VT.getScalarSizeInBits();
2823 unsigned HalfSize = EltSize / 2;
2824 if (isSigned) {
2825 if (!isIntN(HalfSize, C->getSExtValue()))
2826 return false;
2827 } else {
2828 if (!isUIntN(HalfSize, C->getZExtValue()))
2829 return false;
2830 }
2831 continue;
2832 }
2833 return false;
2834 }
2835
2836 return true;
2837}
2838
2839static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2840 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2841 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2842 N->getOperand(0)->getValueType(0),
2843 N->getValueType(0),
2844 N->getOpcode());
2845
2846 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2846, __PRETTY_FUNCTION__))
;
2847 EVT VT = N->getValueType(0);
2848 SDLoc dl(N);
2849 unsigned EltSize = VT.getScalarSizeInBits() / 2;
2850 unsigned NumElts = VT.getVectorNumElements();
2851 MVT TruncVT = MVT::getIntegerVT(EltSize);
2852 SmallVector<SDValue, 8> Ops;
2853 for (unsigned i = 0; i != NumElts; ++i) {
2854 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2855 const APInt &CInt = C->getAPIntValue();
2856 // Element types smaller than 32 bits are not legal, so use i32 elements.
2857 // The values are implicitly truncated so sext vs. zext doesn't matter.
2858 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2859 }
2860 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2861}
2862
2863static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2864 return N->getOpcode() == ISD::SIGN_EXTEND ||
2865 isExtendedBUILD_VECTOR(N, DAG, true);
2866}
2867
2868static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2869 return N->getOpcode() == ISD::ZERO_EXTEND ||
2870 isExtendedBUILD_VECTOR(N, DAG, false);
2871}
2872
2873static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2874 unsigned Opcode = N->getOpcode();
2875 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2876 SDNode *N0 = N->getOperand(0).getNode();
2877 SDNode *N1 = N->getOperand(1).getNode();
2878 return N0->hasOneUse() && N1->hasOneUse() &&
2879 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2880 }
2881 return false;
2882}
2883
2884static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2885 unsigned Opcode = N->getOpcode();
2886 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2887 SDNode *N0 = N->getOperand(0).getNode();
2888 SDNode *N1 = N->getOperand(1).getNode();
2889 return N0->hasOneUse() && N1->hasOneUse() &&
2890 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2891 }
2892 return false;
2893}
2894
2895SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2896 SelectionDAG &DAG) const {
2897 // The rounding mode is in bits 23:22 of the FPSCR.
2898 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2899 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2900 // so that the shift + and get folded into a bitfield extract.
2901 SDLoc dl(Op);
2902
2903 SDValue Chain = Op.getOperand(0);
2904 SDValue FPCR_64 = DAG.getNode(
2905 ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
2906 {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
2907 Chain = FPCR_64.getValue(1);
2908 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
2909 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
2910 DAG.getConstant(1U << 22, dl, MVT::i32));
2911 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2912 DAG.getConstant(22, dl, MVT::i32));
2913 SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2914 DAG.getConstant(3, dl, MVT::i32));
2915 return DAG.getMergeValues({AND, Chain}, dl);
2916}
2917
2918static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2919 // Multiplications are only custom-lowered for 128-bit vectors so that
2920 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2921 EVT VT = Op.getValueType();
2922 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2923, __PRETTY_FUNCTION__))
2923 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2923, __PRETTY_FUNCTION__))
;
2924 SDNode *N0 = Op.getOperand(0).getNode();
2925 SDNode *N1 = Op.getOperand(1).getNode();
2926 unsigned NewOpc = 0;
2927 bool isMLA = false;
2928 bool isN0SExt = isSignExtended(N0, DAG);
2929 bool isN1SExt = isSignExtended(N1, DAG);
2930 if (isN0SExt && isN1SExt)
2931 NewOpc = AArch64ISD::SMULL;
2932 else {
2933 bool isN0ZExt = isZeroExtended(N0, DAG);
2934 bool isN1ZExt = isZeroExtended(N1, DAG);
2935 if (isN0ZExt && isN1ZExt)
2936 NewOpc = AArch64ISD::UMULL;
2937 else if (isN1SExt || isN1ZExt) {
2938 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2939 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2940 if (isN1SExt && isAddSubSExt(N0, DAG)) {
2941 NewOpc = AArch64ISD::SMULL;
2942 isMLA = true;
2943 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2944 NewOpc = AArch64ISD::UMULL;
2945 isMLA = true;
2946 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2947 std::swap(N0, N1);
2948 NewOpc = AArch64ISD::UMULL;
2949 isMLA = true;
2950 }
2951 }
2952
2953 if (!NewOpc) {
2954 if (VT == MVT::v2i64)
2955 // Fall through to expand this. It is not legal.
2956 return SDValue();
2957 else
2958 // Other vector multiplications are legal.
2959 return Op;
2960 }
2961 }
2962
2963 // Legalize to a S/UMULL instruction
2964 SDLoc DL(Op);
2965 SDValue Op0;
2966 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2967 if (!isMLA) {
2968 Op0 = skipExtensionForVectorMULL(N0, DAG);
2969 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2971, __PRETTY_FUNCTION__))
2970 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2971, __PRETTY_FUNCTION__))
2971 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2971, __PRETTY_FUNCTION__))
;
2972 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2973 }
2974 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2975 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2976 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2977 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2978 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2979 EVT Op1VT = Op1.getValueType();
2980 return DAG.getNode(N0->getOpcode(), DL, VT,
2981 DAG.getNode(NewOpc, DL, VT,
2982 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2983 DAG.getNode(NewOpc, DL, VT,
2984 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2985}
2986
2987static inline SDValue getPTrue(SelectionDAG &DAG, SDLoc DL, EVT VT,
2988 int Pattern) {
2989 return DAG.getNode(AArch64ISD::PTRUE, DL, VT,
2990 DAG.getTargetConstant(Pattern, DL, MVT::i32));
2991}
2992
2993SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2994 SelectionDAG &DAG) const {
2995 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2996 SDLoc dl(Op);
2997 switch (IntNo) {
2998 default: return SDValue(); // Don't custom lower most intrinsics.
2999 case Intrinsic::thread_pointer: {
3000 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3001 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
3002 }
3003 case Intrinsic::aarch64_neon_abs: {
3004 EVT Ty = Op.getValueType();
3005 if (Ty == MVT::i64) {
3006 SDValue Result = DAG.getNode(ISD::BITCAST, dl, MVT::v1i64,
3007 Op.getOperand(1));
3008 Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
3009 return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
3010 } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
3011 return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
3012 } else {
3013 report_fatal_error("Unexpected type for AArch64 NEON intrinic");
3014 }
3015 }
3016 case Intrinsic::aarch64_neon_smax:
3017 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
3018 Op.getOperand(1), Op.getOperand(2));
3019 case Intrinsic::aarch64_neon_umax:
3020 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
3021 Op.getOperand(1), Op.getOperand(2));
3022 case Intrinsic::aarch64_neon_smin:
3023 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
3024 Op.getOperand(1), Op.getOperand(2));
3025 case Intrinsic::aarch64_neon_umin:
3026 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
3027 Op.getOperand(1), Op.getOperand(2));
3028
3029 case Intrinsic::aarch64_sve_sunpkhi:
3030 return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
3031 Op.getOperand(1));
3032 case Intrinsic::aarch64_sve_sunpklo:
3033 return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
3034 Op.getOperand(1));
3035 case Intrinsic::aarch64_sve_uunpkhi:
3036 return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
3037 Op.getOperand(1));
3038 case Intrinsic::aarch64_sve_uunpklo:
3039 return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
3040 Op.getOperand(1));
3041 case Intrinsic::aarch64_sve_clasta_n:
3042 return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
3043 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3044 case Intrinsic::aarch64_sve_clastb_n:
3045 return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
3046 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3047 case Intrinsic::aarch64_sve_lasta:
3048 return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
3049 Op.getOperand(1), Op.getOperand(2));
3050 case Intrinsic::aarch64_sve_lastb:
3051 return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
3052 Op.getOperand(1), Op.getOperand(2));
3053 case Intrinsic::aarch64_sve_rev:
3054 return DAG.getNode(AArch64ISD::REV, dl, Op.getValueType(),
3055 Op.getOperand(1));
3056 case Intrinsic::aarch64_sve_tbl:
3057 return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(),
3058 Op.getOperand(1), Op.getOperand(2));
3059 case Intrinsic::aarch64_sve_trn1:
3060 return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
3061 Op.getOperand(1), Op.getOperand(2));
3062 case Intrinsic::aarch64_sve_trn2:
3063 return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
3064 Op.getOperand(1), Op.getOperand(2));
3065 case Intrinsic::aarch64_sve_uzp1:
3066 return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
3067 Op.getOperand(1), Op.getOperand(2));
3068 case Intrinsic::aarch64_sve_uzp2:
3069 return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
3070 Op.getOperand(1), Op.getOperand(2));
3071 case Intrinsic::aarch64_sve_zip1:
3072 return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
3073 Op.getOperand(1), Op.getOperand(2));
3074 case Intrinsic::aarch64_sve_zip2:
3075 return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
3076 Op.getOperand(1), Op.getOperand(2));
3077 case Intrinsic::aarch64_sve_ptrue:
3078 return DAG.getNode(AArch64ISD::PTRUE, dl, Op.getValueType(),
3079 Op.getOperand(1));
3080 case Intrinsic::aarch64_sve_dupq_lane:
3081 return LowerDUPQLane(Op, DAG);
3082 case Intrinsic::aarch64_sve_convert_from_svbool:
3083 return DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, Op.getValueType(),
3084 Op.getOperand(1));
3085 case Intrinsic::aarch64_sve_convert_to_svbool: {
3086 EVT OutVT = Op.getValueType();
3087 EVT InVT = Op.getOperand(1).getValueType();
3088 // Return the operand if the cast isn't changing type,
3089 // i.e. <n x 16 x i1> -> <n x 16 x i1>
3090 if (InVT == OutVT)
3091 return Op.getOperand(1);
3092 // Otherwise, zero the newly introduced lanes.
3093 SDValue Reinterpret =
3094 DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Op.getOperand(1));
3095 SDValue Mask = getPTrue(DAG, dl, InVT, AArch64SVEPredPattern::all);
3096 SDValue MaskReinterpret =
3097 DAG.getNode(AArch64ISD::REINTERPRET_CAST, dl, OutVT, Mask);
3098 return DAG.getNode(ISD::AND, dl, OutVT, Reinterpret, MaskReinterpret);
3099 }
3100
3101 case Intrinsic::aarch64_sve_insr: {
3102 SDValue Scalar = Op.getOperand(2);
3103 EVT ScalarTy = Scalar.getValueType();
3104 if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
3105 Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
3106
3107 return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
3108 Op.getOperand(1), Scalar);
3109 }
3110
3111 case Intrinsic::localaddress: {
3112 const auto &MF = DAG.getMachineFunction();
3113 const auto *RegInfo = Subtarget->getRegisterInfo();
3114 unsigned Reg = RegInfo->getLocalAddressRegister(MF);
3115 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
3116 Op.getSimpleValueType());
3117 }
3118
3119 case Intrinsic::eh_recoverfp: {
3120 // FIXME: This needs to be implemented to correctly handle highly aligned
3121 // stack objects. For now we simply return the incoming FP. Refer D53541
3122 // for more details.
3123 SDValue FnOp = Op.getOperand(1);
3124 SDValue IncomingFPOp = Op.getOperand(2);
3125 GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp);
3126 auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr);
3127 if (!Fn)
3128 report_fatal_error(
3129 "llvm.eh.recoverfp must take a function as the first argument");
3130 return IncomingFPOp;
3131 }
3132 }
3133}
3134
3135bool AArch64TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
3136 return ExtVal.getValueType().isScalableVector();
3137}
3138
3139// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
3140static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
3141 EVT VT, EVT MemVT,
3142 SelectionDAG &DAG) {
3143 assert(VT.isVector() && "VT should be a vector type")((VT.isVector() && "VT should be a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3143, __PRETTY_FUNCTION__))
;
3144 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)((MemVT == MVT::v4i8 && VT == MVT::v4i16) ? static_cast
<void> (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3144, __PRETTY_FUNCTION__))
;
3145
3146 SDValue Value = ST->getValue();
3147
3148 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
3149 // the word lane which represent the v4i8 subvector. It optimizes the store
3150 // to:
3151 //
3152 // xtn v0.8b, v0.8h
3153 // str s0, [x0]
3154
3155 SDValue Undef = DAG.getUNDEF(MVT::i16);
3156 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
3157 {Undef, Undef, Undef, Undef});
3158
3159 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
3160 Value, UndefVec);
3161 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
3162
3163 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
3164 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3165 Trunc, DAG.getConstant(0, DL, MVT::i64));
3166
3167 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
3168 ST->getBasePtr(), ST->getMemOperand());
3169}
3170
3171// Custom lowering for any store, vector or scalar and/or default or with
3172// a truncate operations. Currently only custom lower truncate operation
3173// from vector v4i16 to v4i8 or volatile stores of i128.
3174SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
3175 SelectionDAG &DAG) const {
3176 SDLoc Dl(Op);
3177 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
3178 assert (StoreNode && "Can only custom lower store nodes")((StoreNode && "Can only custom lower store nodes") ?
static_cast<void> (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3178, __PRETTY_FUNCTION__))
;
3179
3180 SDValue Value = StoreNode->getValue();
3181
3182 EVT VT = Value.getValueType();
3183 EVT MemVT = StoreNode->getMemoryVT();
3184
3185 if (VT.isVector()) {
3186 unsigned AS = StoreNode->getAddressSpace();
3187 unsigned Align = StoreNode->getAlignment();
3188 if (Align < MemVT.getStoreSize() &&
3189 !allowsMisalignedMemoryAccesses(MemVT, AS, Align,
3190 StoreNode->getMemOperand()->getFlags(),
3191 nullptr)) {
3192 return scalarizeVectorStore(StoreNode, DAG);
3193 }
3194
3195 if (StoreNode->isTruncatingStore()) {
3196 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
3197 }
3198 // 256 bit non-temporal stores can be lowered to STNP. Do this as part of
3199 // the custom lowering, as there are no un-paired non-temporal stores and
3200 // legalization will break up 256 bit inputs.
3201 if (StoreNode->isNonTemporal() && MemVT.getSizeInBits() == 256u &&
3202 MemVT.getVectorElementCount().Min % 2u == 0 &&
3203 ((MemVT.getScalarSizeInBits() == 8u ||
3204 MemVT.getScalarSizeInBits() == 16u ||
3205 MemVT.getScalarSizeInBits() == 32u ||
3206 MemVT.getScalarSizeInBits() == 64u))) {
3207 SDValue Lo =
3208 DAG.getNode(ISD::EXTRACT_SUBVECTOR, Dl,
3209 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
3210 StoreNode->getValue(), DAG.getConstant(0, Dl, MVT::i64));
3211 SDValue Hi = DAG.getNode(
3212 ISD::EXTRACT_SUBVECTOR, Dl,
3213 MemVT.getHalfNumVectorElementsVT(*DAG.getContext()),
3214 StoreNode->getValue(),
3215 DAG.getConstant(MemVT.getVectorElementCount().Min / 2, Dl, MVT::i64));
3216 SDValue Result = DAG.getMemIntrinsicNode(
3217 AArch64ISD::STNP, Dl, DAG.getVTList(MVT::Other),
3218 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
3219 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
3220 return Result;
3221 }
3222 } else if (MemVT == MVT::i128 && StoreNode->isVolatile()) {
3223 assert(StoreNode->getValue()->getValueType(0) == MVT::i128)((StoreNode->getValue()->getValueType(0) == MVT::i128) ?
static_cast<void> (0) : __assert_fail ("StoreNode->getValue()->getValueType(0) == MVT::i128"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3223, __PRETTY_FUNCTION__))
;
3224 SDValue Lo =
3225 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
3226 DAG.getConstant(0, Dl, MVT::i64));
3227 SDValue Hi =
3228 DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i64, StoreNode->getValue(),
3229 DAG.getConstant(1, Dl, MVT::i64));
3230 SDValue Result = DAG.getMemIntrinsicNode(
3231 AArch64ISD::STP, Dl, DAG.getVTList(MVT::Other),
3232 {StoreNode->getChain(), Lo, Hi, StoreNode->getBasePtr()},
3233 StoreNode->getMemoryVT(), StoreNode->getMemOperand());
3234 return Result;
3235 }
3236
3237 return SDValue();
3238}
3239
3240SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
3241 SelectionDAG &DAG) const {
3242 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
3243 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
3244
3245 switch (Op.getOpcode()) {
3246 default:
3247 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3247)
;
3248 return SDValue();
3249 case ISD::BITCAST:
3250 return LowerBITCAST(Op, DAG);
3251 case ISD::GlobalAddress:
3252 return LowerGlobalAddress(Op, DAG);
3253 case ISD::GlobalTLSAddress:
3254 return LowerGlobalTLSAddress(Op, DAG);
3255 case ISD::SETCC:
3256 case ISD::STRICT_FSETCC:
3257 case ISD::STRICT_FSETCCS:
3258 return LowerSETCC(Op, DAG);
3259 case ISD::BR_CC:
3260 return LowerBR_CC(Op, DAG);
3261 case ISD::SELECT:
3262 return LowerSELECT(Op, DAG);
3263 case ISD::SELECT_CC:
3264 return LowerSELECT_CC(Op, DAG);
3265 case ISD::JumpTable:
3266 return LowerJumpTable(Op, DAG);
3267 case ISD::BR_JT:
3268 return LowerBR_JT(Op, DAG);
3269 case ISD::ConstantPool:
3270 return LowerConstantPool(Op, DAG);
3271 case ISD::BlockAddress:
3272 return LowerBlockAddress(Op, DAG);
3273 case ISD::VASTART:
3274 return LowerVASTART(Op, DAG);
3275 case ISD::VACOPY:
3276 return LowerVACOPY(Op, DAG);
3277 case ISD::VAARG:
3278 return LowerVAARG(Op, DAG);
3279 case ISD::ADDC:
3280 case ISD::ADDE:
3281 case ISD::SUBC:
3282 case ISD::SUBE:
3283 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
3284 case ISD::SADDO:
3285 case ISD::UADDO:
3286 case ISD::SSUBO:
3287 case ISD::USUBO:
3288 case ISD::SMULO:
3289 case ISD::UMULO:
3290 return LowerXALUO(Op, DAG);
3291 case ISD::FADD:
3292 return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
3293 case ISD::FSUB:
3294 return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
3295 case ISD::FMUL:
3296 return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
3297 case ISD::FDIV:
3298 return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
3299 case ISD::FP_ROUND:
3300 case ISD::STRICT_FP_ROUND:
3301 return LowerFP_ROUND(Op, DAG);
3302 case ISD::FP_EXTEND:
3303 return LowerFP_EXTEND(Op, DAG);
3304 case ISD::FRAMEADDR:
3305 return LowerFRAMEADDR(Op, DAG);
3306 case ISD::SPONENTRY:
3307 return LowerSPONENTRY(Op, DAG);
3308 case ISD::RETURNADDR:
3309 return LowerRETURNADDR(Op, DAG);
3310 case ISD::ADDROFRETURNADDR:
3311 return LowerADDROFRETURNADDR(Op, DAG);
3312 case ISD::INSERT_VECTOR_ELT:
3313 return LowerINSERT_VECTOR_ELT(Op, DAG);
3314 case ISD::EXTRACT_VECTOR_ELT:
3315 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
3316 case ISD::BUILD_VECTOR:
3317 return LowerBUILD_VECTOR(Op, DAG);
3318 case ISD::VECTOR_SHUFFLE:
3319 return LowerVECTOR_SHUFFLE(Op, DAG);
3320 case ISD::SPLAT_VECTOR:
3321 return LowerSPLAT_VECTOR(Op, DAG);
3322 case ISD::EXTRACT_SUBVECTOR:
3323 return LowerEXTRACT_SUBVECTOR(Op, DAG);
3324 case ISD::SRA:
3325 case ISD::SRL:
3326 case ISD::SHL:
3327 return LowerVectorSRA_SRL_SHL(Op, DAG);
3328 case ISD::SHL_PARTS:
3329 return LowerShiftLeftParts(Op, DAG);
3330 case ISD::SRL_PARTS:
3331 case ISD::SRA_PARTS:
3332 return LowerShiftRightParts(Op, DAG);
3333 case ISD::CTPOP:
3334 return LowerCTPOP(Op, DAG);
3335 case ISD::FCOPYSIGN:
3336 return LowerFCOPYSIGN(Op, DAG);
3337 case ISD::OR:
3338 return LowerVectorOR(Op, DAG);
3339 case ISD::XOR:
3340 return LowerXOR(Op, DAG);
3341 case ISD::PREFETCH:
3342 return LowerPREFETCH(Op, DAG);
3343 case ISD::SINT_TO_FP:
3344 case ISD::UINT_TO_FP:
3345 case ISD::STRICT_SINT_TO_FP:
3346 case ISD::STRICT_UINT_TO_FP:
3347 return LowerINT_TO_FP(Op, DAG);
3348 case ISD::FP_TO_SINT:
3349 case ISD::FP_TO_UINT:
3350 case ISD::STRICT_FP_TO_SINT:
3351 case ISD::STRICT_FP_TO_UINT:
3352 return LowerFP_TO_INT(Op, DAG);
3353 case ISD::FSINCOS:
3354 return LowerFSINCOS(Op, DAG);
3355 case ISD::FLT_ROUNDS_:
3356 return LowerFLT_ROUNDS_(Op, DAG);
3357 case ISD::MUL:
3358 return LowerMUL(Op, DAG);
3359 case ISD::INTRINSIC_WO_CHAIN:
3360 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
3361 case ISD::STORE:
3362 return LowerSTORE(Op, DAG);
3363 case ISD::VECREDUCE_ADD:
3364 case ISD::VECREDUCE_SMAX:
3365 case ISD::VECREDUCE_SMIN:
3366 case ISD::VECREDUCE_UMAX:
3367 case ISD::VECREDUCE_UMIN:
3368 case ISD::VECREDUCE_FMAX:
3369 case ISD::VECREDUCE_FMIN:
3370 return LowerVECREDUCE(Op, DAG);
3371 case ISD::ATOMIC_LOAD_SUB:
3372 return LowerATOMIC_LOAD_SUB(Op, DAG);
3373 case ISD::ATOMIC_LOAD_AND:
3374 return LowerATOMIC_LOAD_AND(Op, DAG);
3375 case ISD::DYNAMIC_STACKALLOC:
3376 return LowerDYNAMIC_STACKALLOC(Op, DAG);
3377 case ISD::VSCALE:
3378 return LowerVSCALE(Op, DAG);
3379 }
3380}
3381
3382//===----------------------------------------------------------------------===//
3383// Calling Convention Implementation
3384//===----------------------------------------------------------------------===//
3385
3386/// Selects the correct CCAssignFn for a given CallingConvention value.
3387CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
3388 bool IsVarArg) const {
3389 switch (CC) {
3390 default:
3391 report_fatal_error("Unsupported calling convention.");
3392 case CallingConv::WebKit_JS:
3393 return CC_AArch64_WebKit_JS;
3394 case CallingConv::GHC:
3395 return CC_AArch64_GHC;
3396 case CallingConv::C:
3397 case CallingConv::Fast:
3398 case CallingConv::PreserveMost:
3399 case CallingConv::CXX_FAST_TLS:
3400 case CallingConv::Swift:
3401 if (Subtarget->isTargetWindows() && IsVarArg)
3402 return CC_AArch64_Win64_VarArg;
3403 if (!Subtarget->isTargetDarwin())
3404 return CC_AArch64_AAPCS;
3405 if (!IsVarArg)
3406 return CC_AArch64_DarwinPCS;
3407 return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
3408 : CC_AArch64_DarwinPCS_VarArg;
3409 case CallingConv::Win64:
3410 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
3411 case CallingConv::CFGuard_Check:
3412 return CC_AArch64_Win64_CFGuard_Check;
3413 case CallingConv::AArch64_VectorCall:
3414 case CallingConv::AArch64_SVE_VectorCall:
3415 return CC_AArch64_AAPCS;
3416 }
3417}
3418
3419CCAssignFn *
3420AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
3421 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
3422 : RetCC_AArch64_AAPCS;
3423}
3424
3425SDValue AArch64TargetLowering::LowerFormalArguments(
3426 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3427 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3428 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3429 MachineFunction &MF = DAG.getMachineFunction();
3430 MachineFrameInfo &MFI = MF.getFrameInfo();
3431 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3432
3433 // Assign locations to all of the incoming arguments.
3434 SmallVector<CCValAssign, 16> ArgLocs;
3435 DenseMap<unsigned, SDValue> CopiedRegs;
3436 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3437 *DAG.getContext());
3438
3439 // At this point, Ins[].VT may already be promoted to i32. To correctly
3440 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3441 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3442 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
3443 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
3444 // LocVT.
3445 unsigned NumArgs = Ins.size();
3446 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
3447 unsigned CurArgIdx = 0;
3448 for (unsigned i = 0; i != NumArgs; ++i) {
3449 MVT ValVT = Ins[i].VT;
3450 if (Ins[i].isOrigArg()) {
3451 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
3452 CurArgIdx = Ins[i].getOrigArgIndex();
3453
3454 // Get type of the original argument.
3455 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
3456 /*AllowUnknown*/ true);
3457 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
3458 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3459 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3460 ValVT = MVT::i8;
3461 else if (ActualMVT == MVT::i16)
3462 ValVT = MVT::i16;
3463 }
3464 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3465 bool Res =
3466 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
3467 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3467, __PRETTY_FUNCTION__))
;
3468 (void)Res;
3469 }
3470 assert(ArgLocs.size() == Ins.size())((ArgLocs.size() == Ins.size()) ? static_cast<void> (0)
: __assert_fail ("ArgLocs.size() == Ins.size()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3470, __PRETTY_FUNCTION__))
;
3471 SmallVector<SDValue, 16> ArgValues;
3472 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3473 CCValAssign &VA = ArgLocs[i];
3474
3475 if (Ins[i].Flags.isByVal()) {
3476 // Byval is used for HFAs in the PCS, but the system should work in a
3477 // non-compliant manner for larger structs.
3478 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3479 int Size = Ins[i].Flags.getByValSize();
3480 unsigned NumRegs = (Size + 7) / 8;
3481
3482 // FIXME: This works on big-endian for composite byvals, which are the common
3483 // case. It should also work for fundamental types too.
3484 unsigned FrameIdx =
3485 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
3486 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
3487 InVals.push_back(FrameIdxN);
3488
3489 continue;
3490 }
3491
3492 SDValue ArgValue;
3493 if (VA.isRegLoc()) {
3494 // Arguments stored in registers.
3495 EVT RegVT = VA.getLocVT();
3496 const TargetRegisterClass *RC;
3497
3498 if (RegVT == MVT::i32)
3499 RC = &AArch64::GPR32RegClass;
3500 else if (RegVT == MVT::i64)
3501 RC = &AArch64::GPR64RegClass;
3502 else if (RegVT == MVT::f16)
3503 RC = &AArch64::FPR16RegClass;
3504 else if (RegVT == MVT::f32)
3505 RC = &AArch64::FPR32RegClass;
3506 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
3507 RC = &AArch64::FPR64RegClass;
3508 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
3509 RC = &AArch64::FPR128RegClass;
3510 else if (RegVT.isScalableVector() &&
3511 RegVT.getVectorElementType() == MVT::i1)
3512 RC = &AArch64::PPRRegClass;
3513 else if (RegVT.isScalableVector())
3514 RC = &AArch64::ZPRRegClass;
3515 else
3516 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3516)
;
3517
3518 // Transform the arguments in physical registers into virtual ones.
3519 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3520 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
3521
3522 // If this is an 8, 16 or 32-bit value, it is really passed promoted
3523 // to 64 bits. Insert an assert[sz]ext to capture this, then
3524 // truncate to the right size.
3525 switch (VA.getLocInfo()) {
3526 default:
3527 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3527)
;
3528 case CCValAssign::Full:
3529 break;
3530 case CCValAssign::Indirect:
3531 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3532, __PRETTY_FUNCTION__))
3532 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3532, __PRETTY_FUNCTION__))
;
3533 break;
3534 case CCValAssign::BCvt:
3535 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
3536 break;
3537 case CCValAssign::AExt:
3538 case CCValAssign::SExt:
3539 case CCValAssign::ZExt:
3540 break;
3541 case CCValAssign::AExtUpper:
3542 ArgValue = DAG.getNode(ISD::SRL, DL, RegVT, ArgValue,
3543 DAG.getConstant(32, DL, RegVT));
3544 ArgValue = DAG.getZExtOrTrunc(ArgValue, DL, VA.getValVT());
3545 break;
3546 }
3547 } else { // VA.isRegLoc()
3548 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")((VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3548, __PRETTY_FUNCTION__))
;
3549 unsigned ArgOffset = VA.getLocMemOffset();
3550 unsigned ArgSize = (VA.getLocInfo() == CCValAssign::Indirect
3551 ? VA.getLocVT().getSizeInBits()
3552 : VA.getValVT().getSizeInBits()) / 8;
3553
3554 uint32_t BEAlign = 0;
3555 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
3556 !Ins[i].Flags.isInConsecutiveRegs())
3557 BEAlign = 8 - ArgSize;
3558
3559 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
3560
3561 // Create load nodes to retrieve arguments from the stack.
3562 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3563
3564 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
3565 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
3566 MVT MemVT = VA.getValVT();
3567
3568 switch (VA.getLocInfo()) {
3569 default:
3570 break;
3571 case CCValAssign::Trunc:
3572 case CCValAssign::BCvt:
3573 MemVT = VA.getLocVT();
3574 break;
3575 case CCValAssign::Indirect:
3576 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3577, __PRETTY_FUNCTION__))
3577 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3577, __PRETTY_FUNCTION__))
;
3578 MemVT = VA.getLocVT();
3579 break;
3580 case CCValAssign::SExt:
3581 ExtType = ISD::SEXTLOAD;
3582 break;
3583 case CCValAssign::ZExt:
3584 ExtType = ISD::ZEXTLOAD;
3585 break;
3586 case CCValAssign::AExt:
3587 ExtType = ISD::EXTLOAD;
3588 break;
3589 }
3590
3591 ArgValue = DAG.getExtLoad(
3592 ExtType, DL, VA.getLocVT(), Chain, FIN,
3593 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3594 MemVT);
3595
3596 }
3597
3598 if (VA.getLocInfo() == CCValAssign::Indirect) {
3599 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3600, __PRETTY_FUNCTION__))
3600 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3600, __PRETTY_FUNCTION__))
;
3601 // If value is passed via pointer - do a load.
3602 ArgValue =
3603 DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue, MachinePointerInfo());
3604 }
3605
3606 if (Subtarget->isTargetILP32() && Ins[i].Flags.isPointer())
3607 ArgValue = DAG.getNode(ISD::AssertZext, DL, ArgValue.getValueType(),
3608 ArgValue, DAG.getValueType(MVT::i32));
3609 InVals.push_back(ArgValue);
3610 }
3611
3612 // varargs
3613 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3614 if (isVarArg) {
3615 if (!Subtarget->isTargetDarwin() || IsWin64) {
3616 // The AAPCS variadic function ABI is identical to the non-variadic
3617 // one. As a result there may be more arguments in registers and we should
3618 // save them for future reference.
3619 // Win64 variadic functions also pass arguments in registers, but all float
3620 // arguments are passed in integer registers.
3621 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
3622 }
3623
3624 // This will point to the next argument passed via stack.
3625 unsigned StackOffset = CCInfo.getNextStackOffset();
3626 // We currently pass all varargs at 8-byte alignment, or 4 for ILP32
3627 StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
3628 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
3629
3630 if (MFI.hasMustTailInVarArgFunc()) {
3631 SmallVector<MVT, 2> RegParmTypes;
3632 RegParmTypes.push_back(MVT::i64);
3633 RegParmTypes.push_back(MVT::f128);
3634 // Compute the set of forwarded registers. The rest are scratch.
3635 SmallVectorImpl<ForwardedRegister> &Forwards =
3636 FuncInfo->getForwardedMustTailRegParms();
3637 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes,
3638 CC_AArch64_AAPCS);
3639
3640 // Conservatively forward X8, since it might be used for aggregate return.
3641 if (!CCInfo.isAllocated(AArch64::X8)) {
3642 unsigned X8VReg = MF.addLiveIn(AArch64::X8, &AArch64::GPR64RegClass);
3643 Forwards.push_back(ForwardedRegister(X8VReg, AArch64::X8, MVT::i64));
3644 }
3645 }
3646 }
3647
3648 // On Windows, InReg pointers must be returned, so record the pointer in a
3649 // virtual register at the start of the function so it can be returned in the
3650 // epilogue.
3651 if (IsWin64) {
3652 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3653 if (Ins[I].Flags.isInReg()) {
3654 assert(!FuncInfo->getSRetReturnReg())((!FuncInfo->getSRetReturnReg()) ? static_cast<void>
(0) : __assert_fail ("!FuncInfo->getSRetReturnReg()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3654, __PRETTY_FUNCTION__))
;
3655
3656 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3657 Register Reg =
3658 MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3659 FuncInfo->setSRetReturnReg(Reg);
3660
3661 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[I]);
3662 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
3663 break;
3664 }
3665 }
3666 }
3667
3668 unsigned StackArgSize = CCInfo.getNextStackOffset();
3669 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3670 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
3671 // This is a non-standard ABI so by fiat I say we're allowed to make full
3672 // use of the stack area to be popped, which must be aligned to 16 bytes in
3673 // any case:
3674 StackArgSize = alignTo(StackArgSize, 16);
3675
3676 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
3677 // a multiple of 16.
3678 FuncInfo->setArgumentStackToRestore(StackArgSize);
3679
3680 // This realignment carries over to the available bytes below. Our own
3681 // callers will guarantee the space is free by giving an aligned value to
3682 // CALLSEQ_START.
3683 }
3684 // Even if we're not expected to free up the space, it's useful to know how
3685 // much is there while considering tail calls (because we can reuse it).
3686 FuncInfo->setBytesInStackArgArea(StackArgSize);
3687
3688 if (Subtarget->hasCustomCallingConv())
3689 Subtarget->getRegisterInfo()->UpdateCustomCalleeSavedRegs(MF);
3690
3691 return Chain;
3692}
3693
3694void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
3695 SelectionDAG &DAG,
3696 const SDLoc &DL,
3697 SDValue &Chain) const {
3698 MachineFunction &MF = DAG.getMachineFunction();
3699 MachineFrameInfo &MFI = MF.getFrameInfo();
3700 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3701 auto PtrVT = getPointerTy(DAG.getDataLayout());
3702 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3703
3704 SmallVector<SDValue, 8> MemOps;
3705
3706 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
3707 AArch64::X3, AArch64::X4, AArch64::X5,
3708 AArch64::X6, AArch64::X7 };
3709 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
3710 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
3711
3712 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
3713 int GPRIdx = 0;
3714 if (GPRSaveSize != 0) {
3715 if (IsWin64) {
3716 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
3717 if (GPRSaveSize & 15)
3718 // The extra size here, if triggered, will always be 8.
3719 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
3720 } else
3721 GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
3722
3723 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
3724
3725 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
3726 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
3727 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
3728 SDValue Store = DAG.getStore(
3729 Val.getValue(1), DL, Val, FIN,
3730 IsWin64
3731 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
3732 GPRIdx,
3733 (i - FirstVariadicGPR) * 8)
3734 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
3735 MemOps.push_back(Store);
3736 FIN =
3737 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
3738 }
3739 }
3740 FuncInfo->setVarArgsGPRIndex(GPRIdx);
3741 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
3742
3743 if (Subtarget->hasFPARMv8() && !IsWin64) {
3744 static const MCPhysReg FPRArgRegs[] = {
3745 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
3746 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
3747 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
3748 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
3749
3750 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
3751 int FPRIdx = 0;
3752 if (FPRSaveSize != 0) {
3753 FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
3754
3755 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
3756
3757 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
3758 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
3759 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
3760
3761 SDValue Store = DAG.getStore(
3762 Val.getValue(1), DL, Val, FIN,
3763 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
3764 MemOps.push_back(Store);
3765 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3766 DAG.getConstant(16, DL, PtrVT));
3767 }
3768 }
3769 FuncInfo->setVarArgsFPRIndex(FPRIdx);
3770 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3771 }
3772
3773 if (!MemOps.empty()) {
3774 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3775 }
3776}
3777
3778/// LowerCallResult - Lower the result values of a call into the
3779/// appropriate copies out of appropriate physical registers.
3780SDValue AArch64TargetLowering::LowerCallResult(
3781 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3782 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3783 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3784 SDValue ThisVal) const {
3785 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3786 ? RetCC_AArch64_WebKit_JS
3787 : RetCC_AArch64_AAPCS;
3788 // Assign locations to each value returned by this call.
3789 SmallVector<CCValAssign, 16> RVLocs;
3790 DenseMap<unsigned, SDValue> CopiedRegs;
3791 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3792 *DAG.getContext());
3793 CCInfo.AnalyzeCallResult(Ins, RetCC);
3794
3795 // Copy all of the result registers out of their specified physreg.
3796 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3797 CCValAssign VA = RVLocs[i];
3798
3799 // Pass 'this' value directly from the argument to return value, to avoid
3800 // reg unit interference
3801 if (i == 0 && isThisReturn) {
3802 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3803, __PRETTY_FUNCTION__))
3803 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3803, __PRETTY_FUNCTION__))
;
3804 InVals.push_back(ThisVal);
3805 continue;
3806 }
3807
3808 // Avoid copying a physreg twice since RegAllocFast is incompetent and only
3809 // allows one use of a physreg per block.
3810 SDValue Val = CopiedRegs.lookup(VA.getLocReg());
3811 if (!Val) {
3812 Val =
3813 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3814 Chain = Val.getValue(1);
3815 InFlag = Val.getValue(2);
3816 CopiedRegs[VA.getLocReg()] = Val;
3817 }
3818
3819 switch (VA.getLocInfo()) {
3820 default:
3821 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3821)
;
3822 case CCValAssign::Full:
3823 break;
3824 case CCValAssign::BCvt:
3825 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3826 break;
3827 case CCValAssign::AExtUpper:
3828 Val = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), Val,
3829 DAG.getConstant(32, DL, VA.getLocVT()));
3830 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3831 case CCValAssign::AExt:
3832 LLVM_FALLTHROUGH[[gnu::fallthrough]];
3833 case CCValAssign::ZExt:
3834 Val = DAG.getZExtOrTrunc(Val, DL, VA.getValVT());
3835 break;
3836 }
3837
3838 InVals.push_back(Val);
3839 }
3840
3841 return Chain;
3842}
3843
3844/// Return true if the calling convention is one that we can guarantee TCO for.
3845static bool canGuaranteeTCO(CallingConv::ID CC) {
3846 return CC == CallingConv::Fast;
3847}
3848
3849/// Return true if we might ever do TCO for calls with this calling convention.
3850static bool mayTailCallThisCC(CallingConv::ID CC) {
3851 switch (CC) {
3852 case CallingConv::C:
3853 case CallingConv::PreserveMost:
3854 case CallingConv::Swift:
3855 return true;
3856 default:
3857 return canGuaranteeTCO(CC);
3858 }
3859}
3860
3861bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3862 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3863 const SmallVectorImpl<ISD::OutputArg> &Outs,
3864 const SmallVectorImpl<SDValue> &OutVals,
3865 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3866 if (!mayTailCallThisCC(CalleeCC))
3867 return false;
3868
3869 MachineFunction &MF = DAG.getMachineFunction();
3870 const Function &CallerF = MF.getFunction();
3871 CallingConv::ID CallerCC = CallerF.getCallingConv();
3872 bool CCMatch = CallerCC == CalleeCC;
3873
3874 // Byval parameters hand the function a pointer directly into the stack area
3875 // we want to reuse during a tail call. Working around this *is* possible (see
3876 // X86) but less efficient and uglier in LowerCall.
3877 for (Function::const_arg_iterator i = CallerF.arg_begin(),
3878 e = CallerF.arg_end();
3879 i != e; ++i) {
3880 if (i->hasByValAttr())
3881 return false;
3882
3883 // On Windows, "inreg" attributes signify non-aggregate indirect returns.
3884 // In this case, it is necessary to save/restore X0 in the callee. Tail
3885 // call opt interferes with this. So we disable tail call opt when the
3886 // caller has an argument with "inreg" attribute.
3887
3888 // FIXME: Check whether the callee also has an "inreg" argument.
3889 if (i->hasInRegAttr())
3890 return false;
3891 }
3892
3893 if (getTargetMachine().Options.GuaranteedTailCallOpt)
3894 return canGuaranteeTCO(CalleeCC) && CCMatch;
3895
3896 // Externally-defined functions with weak linkage should not be
3897 // tail-called on AArch64 when the OS does not support dynamic
3898 // pre-emption of symbols, as the AAELF spec requires normal calls
3899 // to undefined weak functions to be replaced with a NOP or jump to the
3900 // next instruction. The behaviour of branch instructions in this
3901 // situation (as used for tail calls) is implementation-defined, so we
3902 // cannot rely on the linker replacing the tail call with a return.
3903 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3904 const GlobalValue *GV = G->getGlobal();
3905 const Triple &TT = getTargetMachine().getTargetTriple();
3906 if (GV->hasExternalWeakLinkage() &&
3907 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3908 return false;
3909 }
3910
3911 // Now we search for cases where we can use a tail call without changing the
3912 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3913 // concept.
3914
3915 // I want anyone implementing a new calling convention to think long and hard
3916 // about this assert.
3917 assert((!isVarArg || CalleeCC == CallingConv::C) &&(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3918, __PRETTY_FUNCTION__))
3918 "Unexpected variadic calling convention")(((!isVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"
) ? static_cast<void> (0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3918, __PRETTY_FUNCTION__))
;
3919
3920 LLVMContext &C = *DAG.getContext();
3921 if (isVarArg && !Outs.empty()) {
3922 // At least two cases here: if caller is fastcc then we can't have any
3923 // memory arguments (we'd be expected to clean up the stack afterwards). If
3924 // caller is C then we could potentially use its argument area.
3925
3926 // FIXME: for now we take the most conservative of these in both cases:
3927 // disallow all variadic memory operands.
3928 SmallVector<CCValAssign, 16> ArgLocs;
3929 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3930
3931 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3932 for (const CCValAssign &ArgLoc : ArgLocs)
3933 if (!ArgLoc.isRegLoc())
3934 return false;
3935 }
3936
3937 // Check that the call results are passed in the same way.
3938 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3939 CCAssignFnForCall(CalleeCC, isVarArg),
3940 CCAssignFnForCall(CallerCC, isVarArg)))
3941 return false;
3942 // The callee has to preserve all registers the caller needs to preserve.
3943 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3944 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3945 if (!CCMatch) {
3946 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3947 if (Subtarget->hasCustomCallingConv()) {
3948 TRI->UpdateCustomCallPreservedMask(MF, &CallerPreserved);
3949 TRI->UpdateCustomCallPreservedMask(MF, &CalleePreserved);
3950 }
3951 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3952 return false;
3953 }
3954
3955 // Nothing more to check if the callee is taking no arguments
3956 if (Outs.empty())
3957 return true;
3958
3959 SmallVector<CCValAssign, 16> ArgLocs;
3960 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3961
3962 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3963
3964 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3965
3966 // If any of the arguments is passed indirectly, it must be SVE, so the
3967 // 'getBytesInStackArgArea' is not sufficient to determine whether we need to
3968 // allocate space on the stack. That is why we determine this explicitly here
3969 // the call cannot be a tailcall.
3970 if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
3971 assert((A.getLocInfo() != CCValAssign::Indirect ||(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector
()) && "Expected value to be scalable") ? static_cast
<void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3973, __PRETTY_FUNCTION__))
3972 A.getValVT().isScalableVector()) &&(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector
()) && "Expected value to be scalable") ? static_cast
<void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3973, __PRETTY_FUNCTION__))
3973 "Expected value to be scalable")(((A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector
()) && "Expected value to be scalable") ? static_cast
<void> (0) : __assert_fail ("(A.getLocInfo() != CCValAssign::Indirect || A.getValVT().isScalableVector()) && \"Expected value to be scalable\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3973, __PRETTY_FUNCTION__))
;
3974 return A.getLocInfo() == CCValAssign::Indirect;
3975 }))
3976 return false;
3977
3978 // If the stack arguments for this call do not fit into our own save area then
3979 // the call cannot be made tail.
3980 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3981 return false;
3982
3983 const MachineRegisterInfo &MRI = MF.getRegInfo();
3984 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3985 return false;
3986
3987 return true;
3988}
3989
3990SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3991 SelectionDAG &DAG,
3992 MachineFrameInfo &MFI,
3993 int ClobberedFI) const {
3994 SmallVector<SDValue, 8> ArgChains;
3995 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3996 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3997
3998 // Include the original chain at the beginning of the list. When this is
3999 // used by target LowerCall hooks, this helps legalize find the
4000 // CALLSEQ_BEGIN node.
4001 ArgChains.push_back(Chain);
4002
4003 // Add a chain value for each stack argument corresponding
4004 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
4005 UE = DAG.getEntryNode().getNode()->use_end();
4006 U != UE; ++U)
4007 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
4008 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
4009 if (FI->getIndex() < 0) {
4010 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
4011 int64_t InLastByte = InFirstByte;
4012 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
4013
4014 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
4015 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
4016 ArgChains.push_back(SDValue(L, 1));
4017 }
4018
4019 // Build a tokenfactor for all the chains.
4020 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
4021}
4022
4023bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
4024 bool TailCallOpt) const {
4025 return CallCC == CallingConv::Fast && TailCallOpt;
4026}
4027
4028/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
4029/// and add input and output parameter nodes.
4030SDValue
4031AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
4032 SmallVectorImpl<SDValue> &InVals) const {
4033 SelectionDAG &DAG = CLI.DAG;
4034 SDLoc &DL = CLI.DL;
4035 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
4036 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
4037 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
4038 SDValue Chain = CLI.Chain;
4039 SDValue Callee = CLI.Callee;
4040 bool &IsTailCall = CLI.IsTailCall;
4041 CallingConv::ID CallConv = CLI.CallConv;
4042 bool IsVarArg = CLI.IsVarArg;
4043
4044 MachineFunction &MF = DAG.getMachineFunction();
4045 MachineFunction::CallSiteInfo CSInfo;
4046 bool IsThisReturn = false;
4047
4048 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4049 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
4050 bool IsSibCall = false;
4051
4052 if (IsTailCall) {
4053 // Check if it's really possible to do a tail call.
4054 IsTailCall = isEligibleForTailCallOptimization(
4055 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
4056 if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
4057 report_fatal_error("failed to perform tail call elimination on a call "
4058 "site marked musttail");
4059
4060 // A sibling call is one where we're under the usual C ABI and not planning
4061 // to change that but can still do a tail call:
4062 if (!TailCallOpt && IsTailCall)
4063 IsSibCall = true;
4064
4065 if (IsTailCall)
4066 ++NumTailCalls;
4067 }
4068
4069 // Analyze operands of the call, assigning locations to each operand.
4070 SmallVector<CCValAssign, 16> ArgLocs;
4071 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
4072 *DAG.getContext());
4073
4074 if (IsVarArg) {
4075 // Handle fixed and variable vector arguments differently.
4076 // Variable vector arguments always go into memory.
4077 unsigned NumArgs = Outs.size();
4078
4079 for (unsigned i = 0; i != NumArgs; ++i) {
4080 MVT ArgVT = Outs[i].VT;
4081 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4082 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
4083 /*IsVarArg=*/ !Outs[i].IsFixed);
4084 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
4085 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4085, __PRETTY_FUNCTION__))
;
4086 (void)Res;
4087 }
4088 } else {
4089 // At this point, Outs[].VT may already be promoted to i32. To correctly
4090 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
4091 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
4092 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
4093 // we use a special version of AnalyzeCallOperands to pass in ValVT and
4094 // LocVT.
4095 unsigned NumArgs = Outs.size();
4096 for (unsigned i = 0; i != NumArgs; ++i) {
4097 MVT ValVT = Outs[i].VT;
4098 // Get type of the original argument.
4099 EVT ActualVT = getValueType(DAG.getDataLayout(),
4100 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
4101 /*AllowUnknown*/ true);
4102 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
4103 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
4104 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
4105 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
4106 ValVT = MVT::i8;
4107 else if (ActualMVT == MVT::i16)
4108 ValVT = MVT::i16;
4109
4110 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
4111 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
4112 assert(!Res && "Call operand has unhandled type")((!Res && "Call operand has unhandled type") ? static_cast
<void> (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4112, __PRETTY_FUNCTION__))
;
4113 (void)Res;
4114 }
4115 }
4116
4117 // Get a count of how many bytes are to be pushed on the stack.
4118 unsigned NumBytes = CCInfo.getNextStackOffset();
4119
4120 if (IsSibCall) {
4121 // Since we're not changing the ABI to make this a tail call, the memory
4122 // operands are already available in the caller's incoming argument space.
4123 NumBytes = 0;
4124 }
4125
4126 // FPDiff is the byte offset of the call's argument area from the callee's.
4127 // Stores to callee stack arguments will be placed in FixedStackSlots offset
4128 // by this amount for a tail call. In a sibling call it must be 0 because the
4129 // caller will deallocate the entire stack and the callee still expects its
4130 // arguments to begin at SP+0. Completely unused for non-tail calls.
4131 int FPDiff = 0;
4132
4133 if (IsTailCall && !IsSibCall) {
4134 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
4135
4136 // Since callee will pop argument stack as a tail call, we must keep the
4137 // popped size 16-byte aligned.
4138 NumBytes = alignTo(NumBytes, 16);
4139
4140 // FPDiff will be negative if this tail call requires more space than we
4141 // would automatically have in our incoming argument space. Positive if we
4142 // can actually shrink the stack.
4143 FPDiff = NumReusableBytes - NumBytes;
4144
4145 // The stack pointer must be 16-byte aligned at all times it's used for a
4146 // memory operation, which in practice means at *all* times and in
4147 // particular across call boundaries. Therefore our own arguments started at
4148 // a 16-byte aligned SP and the delta applied for the tail call should
4149 // satisfy the same constraint.
4150 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")((FPDiff % 16 == 0 && "unaligned stack on tail call")
? static_cast<void> (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4150, __PRETTY_FUNCTION__))
;
4151 }
4152
4153 // Adjust the stack pointer for the new arguments...
4154 // These operations are automatically eliminated by the prolog/epilog pass
4155 if (!IsSibCall)
4156 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
4157
4158 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
4159 getPointerTy(DAG.getDataLayout()));
4160
4161 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
4162 SmallSet<unsigned, 8> RegsUsed;
4163 SmallVector<SDValue, 8> MemOpChains;
4164 auto PtrVT = getPointerTy(DAG.getDataLayout());
4165
4166 if (IsVarArg && CLI.CS && CLI.CS.isMustTailCall()) {
4167 const auto &Forwards = FuncInfo->getForwardedMustTailRegParms();
4168 for (const auto &F : Forwards) {
4169 SDValue Val = DAG.getCopyFromReg(Chain, DL, F.VReg, F.VT);
4170 RegsToPass.emplace_back(F.PReg, Val);
4171 }
4172 }
4173
4174 // Walk the register/memloc assignments, inserting copies/loads.
4175 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4176 CCValAssign &VA = ArgLocs[i];
4177 SDValue Arg = OutVals[i];
4178 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4179
4180 // Promote the value if needed.
4181 switch (VA.getLocInfo()) {
4182 default:
4183 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4183)
;
4184 case CCValAssign::Full:
4185 break;
4186 case CCValAssign::SExt:
4187 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
4188 break;
4189 case CCValAssign::ZExt:
4190 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
4191 break;
4192 case CCValAssign::AExt:
4193 if (Outs[i].ArgVT == MVT::i1) {
4194 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
4195 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
4196 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
4197 }
4198 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
4199 break;
4200 case CCValAssign::AExtUpper:
4201 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4201, __PRETTY_FUNCTION__))
;
4202 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
4203 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
4204 DAG.getConstant(32, DL, VA.getLocVT()));
4205 break;
4206 case CCValAssign::BCvt:
4207 Arg = DAG.getBitcast(VA.getLocVT(), Arg);
4208 break;
4209 case CCValAssign::Trunc:
4210 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
4211 break;
4212 case CCValAssign::FPExt:
4213 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
4214 break;
4215 case CCValAssign::Indirect:
4216 assert(VA.getValVT().isScalableVector() &&((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4217, __PRETTY_FUNCTION__))
4217 "Only scalable vectors can be passed indirectly")((VA.getValVT().isScalableVector() && "Only scalable vectors can be passed indirectly"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT().isScalableVector() && \"Only scalable vectors can be passed indirectly\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4217, __PRETTY_FUNCTION__))
;
4218 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4219 Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
4220 unsigned Align = DAG.getDataLayout().getPrefTypeAlignment(Ty);
4221 int FI = MFI.CreateStackObject(
4222 VA.getValVT().getStoreSize().getKnownMinSize(), Align, false);
4223 MFI.setStackID(FI, TargetStackID::SVEVector);
4224
4225 SDValue SpillSlot = DAG.getFrameIndex(
4226 FI, DAG.getTargetLoweringInfo().getFrameIndexTy(DAG.getDataLayout()));
4227 Chain = DAG.getStore(
4228 Chain, DL, Arg, SpillSlot,
4229 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4230 Arg = SpillSlot;
4231 break;
4232 }
4233
4234 if (VA.isRegLoc()) {
4235 if (i == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
4236 Outs[0].VT == MVT::i64) {
4237 assert(VA.getLocVT() == MVT::i64 &&((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4238, __PRETTY_FUNCTION__))
4238 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i64 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4238, __PRETTY_FUNCTION__))
;
4239 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4240, __PRETTY_FUNCTION__))
4240 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i64 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4240, __PRETTY_FUNCTION__))
;
4241 IsThisReturn = true;
4242 }
4243 if (RegsUsed.count(VA.getLocReg())) {
4244 // If this register has already been used then we're trying to pack
4245 // parts of an [N x i32] into an X-register. The extension type will
4246 // take care of putting the two halves in the right place but we have to
4247 // combine them.
4248 SDValue &Bits =
4249 std::find_if(RegsToPass.begin(), RegsToPass.end(),
4250 [=](const std::pair<unsigned, SDValue> &Elt) {
4251 return Elt.first == VA.getLocReg();
4252 })
4253 ->second;
4254 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
4255 // Call site info is used for function's parameter entry value
4256 // tracking. For now we track only simple cases when parameter
4257 // is transferred through whole register.
4258 CSInfo.erase(std::remove_if(CSInfo.begin(), CSInfo.end(),
4259 [&VA](MachineFunction::ArgRegPair ArgReg) {
4260 return ArgReg.Reg == VA.getLocReg();
4261 }),
4262 CSInfo.end());
4263 } else {
4264 RegsToPass.emplace_back(VA.getLocReg(), Arg);
4265 RegsUsed.insert(VA.getLocReg());
4266 const TargetOptions &Options = DAG.getTarget().Options;
4267 if (Options.EnableDebugEntryValues)
4268 CSInfo.emplace_back(VA.getLocReg(), i);
4269 }
4270 } else {
4271 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4271, __PRETTY_FUNCTION__))
;
4272
4273 SDValue DstAddr;
4274 MachinePointerInfo DstInfo;
4275
4276 // FIXME: This works on big-endian for composite byvals, which are the
4277 // common case. It should also work for fundamental types too.
4278 uint32_t BEAlign = 0;
4279 unsigned OpSize;
4280 if (VA.getLocInfo() == CCValAssign::Indirect)
4281 OpSize = VA.getLocVT().getSizeInBits();
4282 else
4283 OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
4284 : VA.getValVT().getSizeInBits();
4285 OpSize = (OpSize + 7) / 8;
4286 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
4287 !Flags.isInConsecutiveRegs()) {
4288 if (OpSize < 8)
4289 BEAlign = 8 - OpSize;
4290 }
4291 unsigned LocMemOffset = VA.getLocMemOffset();
4292 int32_t Offset = LocMemOffset + BEAlign;
4293 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
4294 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
4295
4296 if (IsTailCall) {
4297 Offset = Offset + FPDiff;
4298 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4299
4300 DstAddr = DAG.getFrameIndex(FI, PtrVT);
4301 DstInfo =
4302 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
4303
4304 // Make sure any stack arguments overlapping with where we're storing
4305 // are loaded before this eventual operation. Otherwise they'll be
4306 // clobbered.
4307 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
4308 } else {
4309 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
4310
4311 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
4312 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
4313 LocMemOffset);
4314 }
4315
4316 if (Outs[i].Flags.isByVal()) {
4317 SDValue SizeNode =
4318 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
4319 SDValue Cpy = DAG.getMemcpy(
4320 Chain, DL, DstAddr, Arg, SizeNode,
4321 Outs[i].Flags.getNonZeroByValAlign(),
4322 /*isVol = */ false, /*AlwaysInline = */ false,
4323 /*isTailCall = */ false, DstInfo, MachinePointerInfo());
4324
4325 MemOpChains.push_back(Cpy);
4326 } else {
4327 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
4328 // promoted to a legal register type i32, we should truncate Arg back to
4329 // i1/i8/i16.
4330 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
4331 VA.getValVT() == MVT::i16)
4332 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
4333
4334 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
4335 MemOpChains.push_back(Store);
4336 }
4337 }
4338 }
4339
4340 if (!MemOpChains.empty())
4341 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
4342
4343 // Build a sequence of copy-to-reg nodes chained together with token chain
4344 // and flag operands which copy the outgoing args into the appropriate regs.
4345 SDValue InFlag;
4346 for (auto &RegToPass : RegsToPass) {
4347 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
4348 RegToPass.second, InFlag);
4349 InFlag = Chain.getValue(1);
4350 }
4351
4352 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
4353 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
4354 // node so that legalize doesn't hack it.
4355 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
4356 auto GV = G->getGlobal();
4357 unsigned OpFlags =
4358 Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine());
4359 if (OpFlags & AArch64II::MO_GOT) {
4360 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
4361 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
4362 } else {
4363 const GlobalValue *GV = G->getGlobal();
4364 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
4365 }
4366 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
4367 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4368 Subtarget->isTargetMachO()) {
4369 const char *Sym = S->getSymbol();
4370 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
4371 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
4372 } else {
4373 const char *Sym = S->getSymbol();
4374 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
4375 }
4376 }
4377
4378 // We don't usually want to end the call-sequence here because we would tidy
4379 // the frame up *after* the call, however in the ABI-changing tail-call case
4380 // we've carefully laid out the parameters so that when sp is reset they'll be
4381 // in the correct location.
4382 if (IsTailCall && !IsSibCall) {
4383 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
4384 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
4385 InFlag = Chain.getValue(1);
4386 }
4387
4388 std::vector<SDValue> Ops;
4389 Ops.push_back(Chain);
4390 Ops.push_back(Callee);
4391
4392 if (IsTailCall) {
4393 // Each tail call may have to adjust the stack by a different amount, so
4394 // this information must travel along with the operation for eventual
4395 // consumption by emitEpilogue.
4396 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
4397 }
4398
4399 // Add argument registers to the end of the list so that they are known live
4400 // into the call.
4401 for (auto &RegToPass : RegsToPass)
4402 Ops.push_back(DAG.getRegister(RegToPass.first,
4403 RegToPass.second.getValueType()));
4404
4405 // Check callee args/returns for SVE registers and set calling convention
4406 // accordingly.
4407 if (CallConv == CallingConv::C) {
4408 bool CalleeOutSVE = any_of(Outs, [](ISD::OutputArg &Out){
4409 return Out.VT.isScalableVector();
4410 });
4411 bool CalleeInSVE = any_of(Ins, [](ISD::InputArg &In){
4412 return In.VT.isScalableVector();
4413 });
4414
4415 if (CalleeInSVE || CalleeOutSVE)
4416 CallConv = CallingConv::AArch64_SVE_VectorCall;
4417 }
4418
4419 // Add a register mask operand representing the call-preserved registers.
4420 const uint32_t *Mask;
4421 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4422 if (IsThisReturn) {
4423 // For 'this' returns, use the X0-preserving mask if applicable
4424 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
4425 if (!Mask) {
4426 IsThisReturn = false;
4427 Mask = TRI->getCallPreservedMask(MF, CallConv);
4428 }
4429 } else
4430 Mask = TRI->getCallPreservedMask(MF, CallConv);
4431
4432 if (Subtarget->hasCustomCallingConv())
4433 TRI->UpdateCustomCallPreservedMask(MF, &Mask);
4434
4435 if (TRI->isAnyArgRegReserved(MF))
4436 TRI->emitReservedArgRegCallError(MF);
4437
4438 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4438, __PRETTY_FUNCTION__))
;
4439 Ops.push_back(DAG.getRegisterMask(Mask));
4440
4441 if (InFlag.getNode())
4442 Ops.push_back(InFlag);
4443
4444 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4445
4446 // If we're doing a tall call, use a TC_RETURN here rather than an
4447 // actual call instruction.
4448 if (IsTailCall) {
4449 MF.getFrameInfo().setHasTailCall();
4450 SDValue Ret = DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
4451 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4452 return Ret;
4453 }
4454
4455 // Returns a chain and a flag for retval copy to use.
4456 Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
4457 InFlag = Chain.getValue(1);
4458 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4459
4460 uint64_t CalleePopBytes =
4461 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
4462
4463 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
4464 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
4465 InFlag, DL);
4466 if (!Ins.empty())
4467 InFlag = Chain.getValue(1);
4468
4469 // Handle result values, copying them out of physregs into vregs that we
4470 // return.
4471 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
4472 InVals, IsThisReturn,
4473 IsThisReturn ? OutVals[0] : SDValue());
4474}
4475
4476bool AArch64TargetLowering::CanLowerReturn(
4477 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
4478 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
4479 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
4480 ? RetCC_AArch64_WebKit_JS
4481 : RetCC_AArch64_AAPCS;
4482 SmallVector<CCValAssign, 16> RVLocs;
4483 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
4484 return CCInfo.CheckReturn(Outs, RetCC);
4485}
4486
4487SDValue
4488AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
4489 bool isVarArg,
4490 const SmallVectorImpl<ISD::OutputArg> &Outs,
4491 const SmallVectorImpl<SDValue> &OutVals,
4492 const SDLoc &DL, SelectionDAG &DAG) const {
4493 auto &MF = DAG.getMachineFunction();
4494 auto *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4495
4496 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
4497 ? RetCC_AArch64_WebKit_JS
4498 : RetCC_AArch64_AAPCS;
4499 SmallVector<CCValAssign, 16> RVLocs;
4500 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
4501 *DAG.getContext());
4502 CCInfo.AnalyzeReturn(Outs, RetCC);
4503
4504 // Copy the result values into the output registers.
4505 SDValue Flag;
4506 SmallVector<std::pair<unsigned, SDValue>, 4> RetVals;
4507 SmallSet<unsigned, 4> RegsUsed;
4508 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
4509 ++i, ++realRVLocIdx) {
4510 CCValAssign &VA = RVLocs[i];
4511 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4511, __PRETTY_FUNCTION__))
;
4512 SDValue Arg = OutVals[realRVLocIdx];
4513
4514 switch (VA.getLocInfo()) {
4515 default:
4516 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4516)
;
4517 case CCValAssign::Full:
4518 if (Outs[i].ArgVT == MVT::i1) {
4519 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
4520 // value. This is strictly redundant on Darwin (which uses "zeroext
4521 // i1"), but will be optimised out before ISel.
4522 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
4523 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
4524 }
4525 break;
4526 case CCValAssign::BCvt:
4527 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
4528 break;
4529 case CCValAssign::AExt:
4530 case CCValAssign::ZExt:
4531 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
4532 break;
4533 case CCValAssign::AExtUpper:
4534 assert(VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits")((VA.getValVT() == MVT::i32 && "only expect 32 -> 64 upper bits"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() == MVT::i32 && \"only expect 32 -> 64 upper bits\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4534, __PRETTY_FUNCTION__))
;
4535 Arg = DAG.getZExtOrTrunc(Arg, DL, VA.getLocVT());
4536 Arg = DAG.getNode(ISD::SHL, DL, VA.getLocVT(), Arg,
4537 DAG.getConstant(32, DL, VA.getLocVT()));
4538 break;
4539 }
4540
4541 if (RegsUsed.count(VA.getLocReg())) {
4542 SDValue &Bits =
4543 std::find_if(RetVals.begin(), RetVals.end(),
4544 [=](const std::pair<unsigned, SDValue> &Elt) {
4545 return Elt.first == VA.getLocReg();
4546 })
4547 ->second;
4548 Bits = DAG.getNode(ISD::OR, DL, Bits.getValueType(), Bits, Arg);
4549 } else {
4550 RetVals.emplace_back(VA.getLocReg(), Arg);
4551 RegsUsed.insert(VA.getLocReg());
4552 }
4553 }
4554
4555 SmallVector<SDValue, 4> RetOps(1, Chain);
4556 for (auto &RetVal : RetVals) {
4557 Chain = DAG.getCopyToReg(Chain, DL, RetVal.first, RetVal.second, Flag);
4558 Flag = Chain.getValue(1);
4559 RetOps.push_back(
4560 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
4561 }
4562
4563 // Windows AArch64 ABIs require that for returning structs by value we copy
4564 // the sret argument into X0 for the return.
4565 // We saved the argument into a virtual register in the entry block,
4566 // so now we copy the value out and into X0.
4567 if (unsigned SRetReg = FuncInfo->getSRetReturnReg()) {
4568 SDValue Val = DAG.getCopyFromReg(RetOps[0], DL, SRetReg,
4569 getPointerTy(MF.getDataLayout()));
4570
4571 unsigned RetValReg = AArch64::X0;
4572 Chain = DAG.getCopyToReg(Chain, DL, RetValReg, Val, Flag);
4573 Flag = Chain.getValue(1);
4574
4575 RetOps.push_back(
4576 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
4577 }
4578
4579 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4580 const MCPhysReg *I =
4581 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
4582 if (I) {
4583 for (; *I; ++I) {
4584 if (AArch64::GPR64RegClass.contains(*I))
4585 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
4586 else if (AArch64::FPR64RegClass.contains(*I))
4587 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
4588 else
4589 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4589)
;
4590 }
4591 }
4592
4593 RetOps[0] = Chain; // Update chain.
4594
4595 // Add the flag if we have it.
4596 if (Flag.getNode())
4597 RetOps.push_back(Flag);
4598
4599 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
4600}
4601
4602//===----------------------------------------------------------------------===//
4603// Other Lowering Code
4604//===----------------------------------------------------------------------===//
4605
4606SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
4607 SelectionDAG &DAG,
4608 unsigned Flag) const {
4609 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
4610 N->getOffset(), Flag);
4611}
4612
4613SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
4614 SelectionDAG &DAG,
4615 unsigned Flag) const {
4616 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
4617}
4618
4619SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
4620 SelectionDAG &DAG,
4621 unsigned Flag) const {
4622 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
4623 N->getOffset(), Flag);
4624}
4625
4626SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
4627 SelectionDAG &DAG,
4628 unsigned Flag) const {
4629 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
4630}
4631
4632// (loadGOT sym)
4633template <class NodeTy>
4634SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
4635 unsigned Flags) const {
4636 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
4637 SDLoc DL(N);
4638 EVT Ty = getPointerTy(DAG.getDataLayout());
4639 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
4640 // FIXME: Once remat is capable of dealing with instructions with register
4641 // operands, expand this into two nodes instead of using a wrapper node.
4642 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
4643}
4644
4645// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
4646template <class NodeTy>
4647SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
4648 unsigned Flags) const {
4649 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
4650 SDLoc DL(N);
4651 EVT Ty = getPointerTy(DAG.getDataLayout());
4652 const unsigned char MO_NC = AArch64II::MO_NC;
4653 return DAG.getNode(
4654 AArch64ISD::WrapperLarge, DL, Ty,
4655 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
4656 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
4657 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
4658 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
4659}
4660
4661// (addlow (adrp %hi(sym)) %lo(sym))
4662template <class NodeTy>
4663SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
4664 unsigned Flags) const {
4665 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
4666 SDLoc DL(N);
4667 EVT Ty = getPointerTy(DAG.getDataLayout());
4668 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
4669 SDValue Lo = getTargetNode(N, Ty, DAG,
4670 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
4671 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
4672 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
4673}
4674
4675// (adr sym)
4676template <class NodeTy>
4677SDValue AArch64TargetLowering::getAddrTiny(NodeTy *N, SelectionDAG &DAG,
4678 unsigned Flags) const {
4679 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrTiny\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrTiny\n"
; } } while (false)
;
4680 SDLoc DL(N);
4681 EVT Ty = getPointerTy(DAG.getDataLayout());
4682 SDValue Sym = getTargetNode(N, Ty, DAG, Flags);
4683 return DAG.getNode(AArch64ISD::ADR, DL, Ty, Sym);
4684}
4685
4686SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
4687 SelectionDAG &DAG) const {
4688 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
4689 const GlobalValue *GV = GN->getGlobal();
4690 unsigned OpFlags = Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
4691
4692 if (OpFlags != AArch64II::MO_NO_FLAG)
4693 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4694, __PRETTY_FUNCTION__))
4694 "unexpected offset in global node")((cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&
"unexpected offset in global node") ? static_cast<void>
(0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4694, __PRETTY_FUNCTION__))
;
4695
4696 // This also catches the large code model case for Darwin, and tiny code
4697 // model with got relocations.
4698 if ((OpFlags & AArch64II::MO_GOT) != 0) {
4699 return getGOT(GN, DAG, OpFlags);
4700 }
4701
4702 SDValue Result;
4703 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4704 Result = getAddrLarge(GN, DAG, OpFlags);
4705 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
4706 Result = getAddrTiny(GN, DAG, OpFlags);
4707 } else {
4708 Result = getAddr(GN, DAG, OpFlags);
4709 }
4710 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4711 SDLoc DL(GN);
4712 if (OpFlags & (AArch64II::MO_DLLIMPORT | AArch64II::MO_COFFSTUB))
4713 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
4714 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
4715 return Result;
4716}
4717
4718/// Convert a TLS address reference into the correct sequence of loads
4719/// and calls to compute the variable's address (for Darwin, currently) and
4720/// return an SDValue containing the final node.
4721
4722/// Darwin only has one TLS scheme which must be capable of dealing with the
4723/// fully general situation, in the worst case. This means:
4724/// + "extern __thread" declaration.
4725/// + Defined in a possibly unknown dynamic library.
4726///
4727/// The general system is that each __thread variable has a [3 x i64] descriptor
4728/// which contains information used by the runtime to calculate the address. The
4729/// only part of this the compiler needs to know about is the first xword, which
4730/// contains a function pointer that must be called with the address of the
4731/// entire descriptor in "x0".
4732///
4733/// Since this descriptor may be in a different unit, in general even the
4734/// descriptor must be accessed via an indirect load. The "ideal" code sequence
4735/// is:
4736/// adrp x0, _var@TLVPPAGE
4737/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
4738/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
4739/// ; the function pointer
4740/// blr x1 ; Uses descriptor address in x0
4741/// ; Address of _var is now in x0.
4742///
4743/// If the address of _var's descriptor *is* known to the linker, then it can
4744/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
4745/// a slight efficiency gain.
4746SDValue
4747AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
4748 SelectionDAG &DAG) const {
4749 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4750, __PRETTY_FUNCTION__))
4750 "This function expects a Darwin target")((Subtarget->isTargetDarwin() && "This function expects a Darwin target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4750, __PRETTY_FUNCTION__))
;
4751
4752 SDLoc DL(Op);
4753 MVT PtrVT = getPointerTy(DAG.getDataLayout());
4754 MVT PtrMemVT = getPointerMemTy(DAG.getDataLayout());
4755 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
4756
4757 SDValue TLVPAddr =
4758 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4759 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
4760
4761 // The first entry in the descriptor is a function pointer that we must call
4762 // to obtain the address of the variable.
4763 SDValue Chain = DAG.getEntryNode();
4764 SDValue FuncTLVGet = DAG.getLoad(
4765 PtrMemVT, DL, Chain, DescAddr,
4766 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
4767 /* Alignment = */ PtrMemVT.getSizeInBits() / 8,
4768 MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
4769 Chain = FuncTLVGet.getValue(1);
4770
4771 // Extend loaded pointer if necessary (i.e. if ILP32) to DAG pointer.
4772 FuncTLVGet = DAG.getZExtOrTrunc(FuncTLVGet, DL, PtrVT);
4773
4774 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4775 MFI.setAdjustsStack(true);
4776
4777 // TLS calls preserve all registers except those that absolutely must be
4778 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
4779 // silly).
4780 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
4781 const uint32_t *Mask = TRI->getTLSCallPreservedMask();
4782 if (Subtarget->hasCustomCallingConv())
4783 TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
4784
4785 // Finally, we can make the call. This is just a degenerate version of a
4786 // normal AArch64 call node: x0 takes the address of the descriptor, and
4787 // returns the address of the variable in this thread.
4788 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
4789 Chain =
4790 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
4791 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
4792 DAG.getRegisterMask(Mask), Chain.getValue(1));
4793 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
4794}
4795
4796/// Convert a thread-local variable reference into a sequence of instructions to
4797/// compute the variable's address for the local exec TLS model of ELF targets.
4798/// The sequence depends on the maximum TLS area size.
4799SDValue AArch64TargetLowering::LowerELFTLSLocalExec(const GlobalValue *GV,
4800 SDValue ThreadBase,
4801 const SDLoc &DL,
4802 SelectionDAG &DAG) const {
4803 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4804 SDValue TPOff, Addr;
4805
4806 switch (DAG.getTarget().Options.TLSSize) {
4807 default:
4808 llvm_unreachable("Unexpected TLS size")::llvm::llvm_unreachable_internal("Unexpected TLS size", "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4808)
;
4809
4810 case 12: {
4811 // mrs x0, TPIDR_EL0
4812 // add x0, x0, :tprel_lo12:a
4813 SDValue Var = DAG.getTargetGlobalAddress(
4814 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_PAGEOFF);
4815 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
4816 Var,
4817 DAG.getTargetConstant(0, DL, MVT::i32)),
4818 0);
4819 }
4820
4821 case 24: {
4822 // mrs x0, TPIDR_EL0
4823 // add x0, x0, :tprel_hi12:a
4824 // add x0, x0, :tprel_lo12_nc:a
4825 SDValue HiVar = DAG.getTargetGlobalAddress(
4826 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4827 SDValue LoVar = DAG.getTargetGlobalAddress(
4828 GV, DL, PtrVT, 0,
4829 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4830 Addr = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
4831 HiVar,
4832 DAG.getTargetConstant(0, DL, MVT::i32)),
4833 0);
4834 return SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, Addr,
4835 LoVar,
4836 DAG.getTargetConstant(0, DL, MVT::i32)),
4837 0);
4838 }
4839
4840 case 32: {
4841 // mrs x1, TPIDR_EL0
4842 // movz x0, #:tprel_g1:a
4843 // movk x0, #:tprel_g0_nc:a
4844 // add x0, x1, x0
4845 SDValue HiVar = DAG.getTargetGlobalAddress(
4846 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G1);
4847 SDValue LoVar = DAG.getTargetGlobalAddress(
4848 GV, DL, PtrVT, 0,
4849 AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
4850 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
4851 DAG.getTargetConstant(16, DL, MVT::i32)),
4852 0);
4853 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
4854 DAG.getTargetConstant(0, DL, MVT::i32)),
4855 0);
4856 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
4857 }
4858
4859 case 48: {
4860 // mrs x1, TPIDR_EL0
4861 // movz x0, #:tprel_g2:a
4862 // movk x0, #:tprel_g1_nc:a
4863 // movk x0, #:tprel_g0_nc:a
4864 // add x0, x1, x0
4865 SDValue HiVar = DAG.getTargetGlobalAddress(
4866 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_G2);
4867 SDValue MiVar = DAG.getTargetGlobalAddress(
4868 GV, DL, PtrVT, 0,
4869 AArch64II::MO_TLS | AArch64II::MO_G1 | AArch64II::MO_NC);
4870 SDValue LoVar = DAG.getTargetGlobalAddress(
4871 GV, DL, PtrVT, 0,
4872 AArch64II::MO_TLS | AArch64II::MO_G0 | AArch64II::MO_NC);
4873 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZXi, DL, PtrVT, HiVar,
4874 DAG.getTargetConstant(32, DL, MVT::i32)),
4875 0);
4876 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, MiVar,
4877 DAG.getTargetConstant(16, DL, MVT::i32)),
4878 0);
4879 TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKXi, DL, PtrVT, TPOff, LoVar,
4880 DAG.getTargetConstant(0, DL, MVT::i32)),
4881 0);
4882 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
4883 }
4884 }
4885}
4886
4887/// When accessing thread-local variables under either the general-dynamic or
4888/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
4889/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
4890/// is a function pointer to carry out the resolution.
4891///
4892/// The sequence is:
4893/// adrp x0, :tlsdesc:var
4894/// ldr x1, [x0, #:tlsdesc_lo12:var]
4895/// add x0, x0, #:tlsdesc_lo12:var
4896/// .tlsdesccall var
4897/// blr x1
4898/// (TPIDR_EL0 offset now in x0)
4899///
4900/// The above sequence must be produced unscheduled, to enable the linker to
4901/// optimize/relax this sequence.
4902/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
4903/// above sequence, and expanded really late in the compilation flow, to ensure
4904/// the sequence is produced as per above.
4905SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
4906 const SDLoc &DL,
4907 SelectionDAG &DAG) const {
4908 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4909
4910 SDValue Chain = DAG.getEntryNode();
4911 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4912
4913 Chain =
4914 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
4915 SDValue Glue = Chain.getValue(1);
4916
4917 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
4918}
4919
4920SDValue
4921AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
4922 SelectionDAG &DAG) const {
4923 assert(Subtarget->isTargetELF() && "This function expects an ELF target")((Subtarget->isTargetELF() && "This function expects an ELF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4923, __PRETTY_FUNCTION__))
;
4924
4925 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4926
4927 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
4928
4929 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
4930 if (Model == TLSModel::LocalDynamic)
4931 Model = TLSModel::GeneralDynamic;
4932 }
4933
4934 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4935 Model != TLSModel::LocalExec)
4936 report_fatal_error("ELF TLS only supported in small memory model or "
4937 "in local exec TLS model");
4938 // Different choices can be made for the maximum size of the TLS area for a
4939 // module. For the small address model, the default TLS size is 16MiB and the
4940 // maximum TLS size is 4GiB.
4941 // FIXME: add tiny and large code model support for TLS access models other
4942 // than local exec. We currently generate the same code as small for tiny,
4943 // which may be larger than needed.
4944
4945 SDValue TPOff;
4946 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4947 SDLoc DL(Op);
4948 const GlobalValue *GV = GA->getGlobal();
4949
4950 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
4951
4952 if (Model == TLSModel::LocalExec) {
4953 return LowerELFTLSLocalExec(GV, ThreadBase, DL, DAG);
4954 } else if (Model == TLSModel::InitialExec) {
4955 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4956 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
4957 } else if (Model == TLSModel::LocalDynamic) {
4958 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
4959 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
4960 // the beginning of the module's TLS region, followed by a DTPREL offset
4961 // calculation.
4962
4963 // These accesses will need deduplicating if there's more than one.
4964 AArch64FunctionInfo *MFI =
4965 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4966 MFI->incNumLocalDynamicTLSAccesses();
4967
4968 // The call needs a relocation too for linker relaxation. It doesn't make
4969 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4970 // the address.
4971 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
4972 AArch64II::MO_TLS);
4973
4974 // Now we can calculate the offset from TPIDR_EL0 to this module's
4975 // thread-local area.
4976 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4977
4978 // Now use :dtprel_whatever: operations to calculate this variable's offset
4979 // in its thread-storage area.
4980 SDValue HiVar = DAG.getTargetGlobalAddress(
4981 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4982 SDValue LoVar = DAG.getTargetGlobalAddress(
4983 GV, DL, MVT::i64, 0,
4984 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4985
4986 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
4987 DAG.getTargetConstant(0, DL, MVT::i32)),
4988 0);
4989 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
4990 DAG.getTargetConstant(0, DL, MVT::i32)),
4991 0);
4992 } else if (Model == TLSModel::GeneralDynamic) {
4993 // The call needs a relocation too for linker relaxation. It doesn't make
4994 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4995 // the address.
4996 SDValue SymAddr =
4997 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4998
4999 // Finally we can make a call to calculate the offset from tpidr_el0.
5000 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
5001 } else
5002 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5002)
;
5003
5004 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
5005}
5006
5007SDValue
5008AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
5009 SelectionDAG &DAG) const {
5010 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")((Subtarget->isTargetWindows() && "Windows specific TLS lowering"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5010, __PRETTY_FUNCTION__))
;
5011
5012 SDValue Chain = DAG.getEntryNode();
5013 EVT PtrVT = getPointerTy(DAG.getDataLayout());
5014 SDLoc DL(Op);
5015
5016 SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
5017
5018 // Load the ThreadLocalStoragePointer from the TEB
5019 // A pointer to the TLS array is located at offset 0x58 from the TEB.
5020 SDValue TLSArray =
5021 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
5022 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
5023 Chain = TLSArray.getValue(1);
5024
5025 // Load the TLS index from the C runtime;
5026 // This does the same as getAddr(), but without having a GlobalAddressSDNode.
5027 // This also does the same as LOADgot, but using a generic i32 load,
5028 // while LOADgot only loads i64.
5029 SDValue TLSIndexHi =
5030 DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
5031 SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
5032 "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
5033 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
5034 SDValue TLSIndex =
5035 DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
5036 TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
5037 Chain = TLSIndex.getValue(1);
5038
5039 // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
5040 // offset into the TLSArray.
5041 TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
5042 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
5043 DAG.getConstant(3, DL, PtrVT));
5044 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
5045 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
5046 MachinePointerInfo());
5047 Chain = TLS.getValue(1);
5048
5049 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
5050 const GlobalValue *GV = GA->getGlobal();
5051 SDValue TGAHi = DAG.getTargetGlobalAddress(
5052 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
5053 SDValue TGALo = DAG.getTargetGlobalAddress(
5054 GV, DL, PtrVT, 0,
5055 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
5056
5057 // Add the offset from the start of the .tls section (section base).
5058 SDValue Addr =
5059 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
5060 DAG.getTargetConstant(0, DL, MVT::i32)),
5061 0);
5062 Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
5063 return Addr;
5064}
5065
5066SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
5067 SelectionDAG &DAG) const {
5068 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
5069 if (DAG.getTarget().useEmulatedTLS())
5070 return LowerToTLSEmulatedModel(GA, DAG);
5071
5072 if (Subtarget->isTargetDarwin())
5073 return LowerDarwinGlobalTLSAddress(Op, DAG);
5074 if (Subtarget->isTargetELF())
5075 return LowerELFGlobalTLSAddress(Op, DAG);
5076 if (Subtarget->isTargetWindows())
5077 return LowerWindowsGlobalTLSAddress(Op, DAG);
5078
5079 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5079)
;
5080}
5081
5082SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
5083 SDValue Chain = Op.getOperand(0);
5084 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
5085 SDValue LHS = Op.getOperand(2);
5086 SDValue RHS = Op.getOperand(3);
5087 SDValue Dest = Op.getOperand(4);
5088 SDLoc dl(Op);
5089
5090 MachineFunction &MF = DAG.getMachineFunction();
5091 // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
5092 // will not be produced, as they are conditional branch instructions that do
5093 // not set flags.
5094 bool ProduceNonFlagSettingCondBr =
5095 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening);
5096
5097 // Handle f128 first, since lowering it will result in comparing the return
5098 // value of a libcall against zero, which is just what the rest of LowerBR_CC
5099 // is expecting to deal with.
5100 if (LHS.getValueType() == MVT::f128) {
5101 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
5102
5103 // If softenSetCCOperands returned a scalar, we need to compare the result
5104 // against zero to select between true and false values.
5105 if (!RHS.getNode()) {
5106 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5107 CC = ISD::SETNE;
5108 }
5109 }
5110
5111 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
5112 // instruction.
5113 if (ISD::isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
5114 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
5115 // Only lower legal XALUO ops.
5116 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
5117 return SDValue();
5118
5119 // The actual operation with overflow check.
5120 AArch64CC::CondCode OFCC;
5121 SDValue Value, Overflow;
5122 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
5123
5124 if (CC == ISD::SETNE)
5125 OFCC = getInvertedCondCode(OFCC);
5126 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
5127
5128 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
5129 Overflow);
5130 }
5131
5132 if (LHS.getValueType().isInteger()) {
5133 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5134, __PRETTY_FUNCTION__))
5134 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5134, __PRETTY_FUNCTION__))
;
5135
5136 // If the RHS of the comparison is zero, we can potentially fold this
5137 // to a specialized branch.
5138 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
5139 if (RHSC && RHSC->getZExtValue() == 0 && ProduceNonFlagSettingCondBr) {
5140 if (CC == ISD::SETEQ) {
5141 // See if we can use a TBZ to fold in an AND as well.
5142 // TBZ has a smaller branch displacement than CBZ. If the offset is
5143 // out of bounds, a late MI-layer pass rewrites branches.
5144 // 403.gcc is an example that hits this case.
5145 if (LHS.getOpcode() == ISD::AND &&
5146 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5147 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
5148 SDValue Test = LHS.getOperand(0);
5149 uint64_t Mask = LHS.getConstantOperandVal(1);
5150 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
5151 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
5152 Dest);
5153 }
5154
5155 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
5156 } else if (CC == ISD::SETNE) {
5157 // See if we can use a TBZ to fold in an AND as well.
5158 // TBZ has a smaller branch displacement than CBZ. If the offset is
5159 // out of bounds, a late MI-layer pass rewrites branches.
5160 // 403.gcc is an example that hits this case.
5161 if (LHS.getOpcode() == ISD::AND &&
5162 isa<ConstantSDNode>(LHS.getOperand(1)) &&
5163 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
5164 SDValue Test = LHS.getOperand(0);
5165 uint64_t Mask = LHS.getConstantOperandVal(1);
5166 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
5167 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
5168 Dest);
5169 }
5170
5171 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
5172 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
5173 // Don't combine AND since emitComparison converts the AND to an ANDS
5174 // (a.k.a. TST) and the test in the test bit and branch instruction
5175 // becomes redundant. This would also increase register pressure.
5176 uint64_t Mask = LHS.getValueSizeInBits() - 1;
5177 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
5178 DAG.getConstant(Mask, dl, MVT::i64), Dest);
5179 }
5180 }
5181 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
5182 LHS.getOpcode() != ISD::AND && ProduceNonFlagSettingCondBr) {
5183 // Don't combine AND since emitComparison converts the AND to an ANDS
5184 // (a.k.a. TST) and the test in the test bit and branch instruction
5185 // becomes redundant. This would also increase register pressure.
5186 uint64_t Mask = LHS.getValueSizeInBits() - 1;
5187 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
5188 DAG.getConstant(Mask, dl, MVT::i64), Dest);
5189 }
5190
5191 SDValue CCVal;
5192 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
5193 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
5194 Cmp);
5195 }
5196
5197 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5198, __PRETTY_FUNCTION__))
5198 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5198, __PRETTY_FUNCTION__))
;
5199
5200 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
5201 // clean. Some of them require two branches to implement.
5202 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
5203 AArch64CC::CondCode CC1, CC2;
5204 changeFPCCToAArch64CC(CC, CC1, CC2);
5205 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5206 SDValue BR1 =
5207 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
5208 if (CC2 != AArch64CC::AL) {
5209 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
5210 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
5211 Cmp);
5212 }
5213
5214 return BR1;
5215}
5216
5217SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
5218 SelectionDAG &DAG) const {
5219 EVT VT = Op.getValueType();
5220 SDLoc DL(Op);
5221
5222 SDValue In1 = Op.getOperand(0);
5223 SDValue In2 = Op.getOperand(1);
5224 EVT SrcVT = In2.getValueType();
5225
5226 if (SrcVT.bitsLT(VT))
5227 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
5228 else if (SrcVT.bitsGT(VT))
5229 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
5230
5231 EVT VecVT;
5232 uint64_t EltMask;
5233 SDValue VecVal1, VecVal2;
5234
5235 auto setVecVal = [&] (int Idx) {
5236 if (!VT.isVector()) {
5237 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
5238 DAG.getUNDEF(VecVT), In1);
5239 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
5240 DAG.getUNDEF(VecVT), In2);
5241 } else {
5242 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
5243 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
5244 }
5245 };
5246
5247 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
5248 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
5249 EltMask = 0x80000000ULL;
5250 setVecVal(AArch64::ssub);
5251 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
5252 VecVT = MVT::v2i64;
5253
5254 // We want to materialize a mask with the high bit set, but the AdvSIMD
5255 // immediate moves cannot materialize that in a single instruction for
5256 // 64-bit elements. Instead, materialize zero and then negate it.
5257 EltMask = 0;
5258
5259 setVecVal(AArch64::dsub);
5260 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
5261 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
5262 EltMask = 0x8000ULL;
5263 setVecVal(AArch64::hsub);
5264 } else {
5265 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5265)
;
5266 }
5267
5268 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
5269
5270 // If we couldn't materialize the mask above, then the mask vector will be
5271 // the zero vector, and we need to negate it here.
5272 if (VT == MVT::f64 || VT == MVT::v2f64) {
5273 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
5274 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
5275 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
5276 }
5277
5278 SDValue Sel =
5279 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
5280
5281 if (VT == MVT::f16)
5282 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
5283 if (VT == MVT::f32)
5284 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
5285 else if (VT == MVT::f64)
5286 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
5287 else
5288 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
5289}
5290
5291SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
5292 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
5293 Attribute::NoImplicitFloat))
5294 return SDValue();
5295
5296 if (!Subtarget->hasNEON())
5297 return SDValue();
5298
5299 // While there is no integer popcount instruction, it can
5300 // be more efficiently lowered to the following sequence that uses
5301 // AdvSIMD registers/instructions as long as the copies to/from
5302 // the AdvSIMD registers are cheap.
5303 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
5304 // CNT V0.8B, V0.8B // 8xbyte pop-counts
5305 // ADDV B0, V0.8B // sum 8xbyte pop-counts
5306 // UMOV X0, V0.B[0] // copy byte result back to integer reg
5307 SDValue Val = Op.getOperand(0);
5308 SDLoc DL(Op);
5309 EVT VT = Op.getValueType();
5310
5311 if (VT == MVT::i32 || VT == MVT::i64) {
5312 if (VT == MVT::i32)
5313 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
5314 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
5315
5316 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
5317 SDValue UaddLV = DAG.getNode(
5318 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
5319 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
5320
5321 if (VT == MVT::i64)
5322 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
5323 return UaddLV;
5324 }
5325
5326 assert((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5328, __PRETTY_FUNCTION__))
5327 VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5328, __PRETTY_FUNCTION__))
5328 "Unexpected type for custom ctpop lowering")(((VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 ||
VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) &&
"Unexpected type for custom ctpop lowering") ? static_cast<
void> (0) : __assert_fail ("(VT == MVT::v1i64 || VT == MVT::v2i64 || VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5328, __PRETTY_FUNCTION__))
;
5329
5330 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5331 Val = DAG.getBitcast(VT8Bit, Val);
5332 Val = DAG.getNode(ISD::CTPOP, DL, VT8Bit, Val);
5333
5334 // Widen v8i8/v16i8 CTPOP result to VT by repeatedly widening pairwise adds.
5335 unsigned EltSize = 8;
5336 unsigned NumElts = VT.is64BitVector() ? 8 : 16;
5337 while (EltSize != VT.getScalarSizeInBits()) {
5338 EltSize *= 2;
5339 NumElts /= 2;
5340 MVT WidenVT = MVT::getVectorVT(MVT::getIntegerVT(EltSize), NumElts);
5341 Val = DAG.getNode(
5342 ISD::INTRINSIC_WO_CHAIN, DL, WidenVT,
5343 DAG.getConstant(Intrinsic::aarch64_neon_uaddlp, DL, MVT::i32), Val);
5344 }
5345
5346 return Val;
5347}
5348
5349SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
5350
5351 if (Op.getValueType().isVector())
5352 return LowerVSETCC(Op, DAG);
5353
5354 bool IsStrict = Op->isStrictFPOpcode();
5355 bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS;
5356 unsigned OpNo = IsStrict ? 1 : 0;
5357 SDValue Chain;
5358 if (IsStrict)
5359 Chain = Op.getOperand(0);
5360 SDValue LHS = Op.getOperand(OpNo + 0);
5361 SDValue RHS = Op.getOperand(OpNo + 1);
5362 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
5363 SDLoc dl(Op);
5364
5365 // We chose ZeroOrOneBooleanContents, so use zero and one.
5366 EVT VT = Op.getValueType();
5367 SDValue TVal = DAG.getConstant(1, dl, VT);
5368 SDValue FVal = DAG.getConstant(0, dl, VT);
5369
5370 // Handle f128 first, since one possible outcome is a normal integer
5371 // comparison which gets picked up by the next if statement.
5372 if (LHS.getValueType() == MVT::f128) {
5373 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
5374 IsSignaling);
5375
5376 // If softenSetCCOperands returned a scalar, use it.
5377 if (!RHS.getNode()) {
5378 assert(LHS.getValueType() == Op.getValueType() &&((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5379, __PRETTY_FUNCTION__))
5379 "Unexpected setcc expansion!")((LHS.getValueType() == Op.getValueType() && "Unexpected setcc expansion!"
) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5379, __PRETTY_FUNCTION__))
;
5380 return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
5381 }
5382 }
5383
5384 if (LHS.getValueType().isInteger()) {
5385 SDValue CCVal;
5386 SDValue Cmp = getAArch64Cmp(
5387 LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
5388
5389 // Note that we inverted the condition above, so we reverse the order of
5390 // the true and false operands here. This will allow the setcc to be
5391 // matched to a single CSINC instruction.
5392 SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
5393 return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
5394 }
5395
5396 // Now we know we're dealing with FP values.
5397 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5398, __PRETTY_FUNCTION__))
5398 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5398, __PRETTY_FUNCTION__))
;
5399
5400 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
5401 // and do the comparison.
5402 SDValue Cmp;
5403 if (IsStrict)
5404 Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
5405 else
5406 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
5407
5408 AArch64CC::CondCode CC1, CC2;
5409 changeFPCCToAArch64CC(CC, CC1, CC2);
5410 SDValue Res;
5411 if (CC2 == AArch64CC::AL) {
5412 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
5413 CC2);
5414 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5415
5416 // Note that we inverted the condition above, so we reverse the order of
5417 // the true and false operands here. This will allow the setcc to be
5418 // matched to a single CSINC instruction.
5419 Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
5420 } else {
5421 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
5422 // totally clean. Some of them require two CSELs to implement. As is in
5423 // this case, we emit the first CSEL and then emit a second using the output
5424 // of the first as the RHS. We're effectively OR'ing the two CC's together.
5425
5426 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
5427 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5428 SDValue CS1 =
5429 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
5430
5431 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
5432 Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
5433 }
5434 return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
5435}
5436
5437SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
5438 SDValue RHS, SDValue TVal,
5439 SDValue FVal, const SDLoc &dl,
5440 SelectionDAG &DAG) const {
5441 // Handle f128 first, because it will result in a comparison of some RTLIB
5442 // call result against zero.
5443 if (LHS.getValueType() == MVT::f128) {
5444 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
5445
5446 // If softenSetCCOperands returned a scalar, we need to compare the result
5447 // against zero to select between true and false values.
5448 if (!RHS.getNode()) {
5449 RHS = DAG.getConstant(0, dl, LHS.getValueType());
5450 CC = ISD::SETNE;
5451 }
5452 }
5453
5454 // Also handle f16, for which we need to do a f32 comparison.
5455 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
5456 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
5457 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
5458 }
5459
5460 // Next, handle integers.
5461 if (LHS.getValueType().isInteger()) {
5462 assert((LHS.getValueType() == RHS.getValueType()) &&(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5463, __PRETTY_FUNCTION__))
5463 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(((LHS.getValueType() == RHS.getValueType()) && (LHS.
getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)
) ? static_cast<void> (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5463, __PRETTY_FUNCTION__))
;
5464
5465 unsigned Opcode = AArch64ISD::CSEL;
5466
5467 // If both the TVal and the FVal are constants, see if we can swap them in
5468 // order to for a CSINV or CSINC out of them.
5469 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
5470 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
5471
5472 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
5473 std::swap(TVal, FVal);
5474 std::swap(CTVal, CFVal);
5475 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5476 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
5477 std::swap(TVal, FVal);
5478 std::swap(CTVal, CFVal);
5479 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5480 } else if (TVal.getOpcode() == ISD::XOR) {
5481 // If TVal is a NOT we want to swap TVal and FVal so that we can match
5482 // with a CSINV rather than a CSEL.
5483 if (isAllOnesConstant(TVal.getOperand(1))) {
5484 std::swap(TVal, FVal);
5485 std::swap(CTVal, CFVal);
5486 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5487 }
5488 } else if (TVal.getOpcode() == ISD::SUB) {
5489 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
5490 // that we can match with a CSNEG rather than a CSEL.
5491 if (isNullConstant(TVal.getOperand(0))) {
5492 std::swap(TVal, FVal);
5493 std::swap(CTVal, CFVal);
5494 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5495 }
5496 } else if (CTVal && CFVal) {
5497 const int64_t TrueVal = CTVal->getSExtValue();
5498 const int64_t FalseVal = CFVal->getSExtValue();
5499 bool Swap = false;
5500
5501 // If both TVal and FVal are constants, see if FVal is the
5502 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
5503 // instead of a CSEL in that case.
5504 if (TrueVal == ~FalseVal) {
5505 Opcode = AArch64ISD::CSINV;
5506 } else if (TrueVal == -FalseVal) {
5507 Opcode = AArch64ISD::CSNEG;
5508 } else if (TVal.getValueType() == MVT::i32) {
5509 // If our operands are only 32-bit wide, make sure we use 32-bit
5510 // arithmetic for the check whether we can use CSINC. This ensures that
5511 // the addition in the check will wrap around properly in case there is
5512 // an overflow (which would not be the case if we do the check with
5513 // 64-bit arithmetic).
5514 const uint32_t TrueVal32 = CTVal->getZExtValue();
5515 const uint32_t FalseVal32 = CFVal->getZExtValue();
5516
5517 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
5518 Opcode = AArch64ISD::CSINC;
5519
5520 if (TrueVal32 > FalseVal32) {
5521 Swap = true;
5522 }
5523 }
5524 // 64-bit check whether we can use CSINC.
5525 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
5526 Opcode = AArch64ISD::CSINC;
5527
5528 if (TrueVal > FalseVal) {
5529 Swap = true;
5530 }
5531 }
5532
5533 // Swap TVal and FVal if necessary.
5534 if (Swap) {
5535 std::swap(TVal, FVal);
5536 std::swap(CTVal, CFVal);
5537 CC = ISD::getSetCCInverse(CC, LHS.getValueType());
5538 }
5539
5540 if (Opcode != AArch64ISD::CSEL) {
5541 // Drop FVal since we can get its value by simply inverting/negating
5542 // TVal.
5543 FVal = TVal;
5544 }
5545 }
5546
5547 // Avoid materializing a constant when possible by reusing a known value in
5548 // a register. However, don't perform this optimization if the known value
5549 // is one, zero or negative one in the case of a CSEL. We can always
5550 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
5551 // FVal, respectively.
5552 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
5553 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
5554 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
5555 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
5556 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
5557 // "a != C ? x : a" to avoid materializing C.
5558 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
5559 TVal = LHS;
5560 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
5561 FVal = LHS;
5562 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
5563 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")((CTVal && CFVal && "Expected constant operands for CSNEG."
) ? static_cast<void> (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5563, __PRETTY_FUNCTION__))
;
5564 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
5565 // avoid materializing C.
5566 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
5567 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
5568 Opcode = AArch64ISD::CSINV;
5569 TVal = LHS;
5570 FVal = DAG.getConstant(0, dl, FVal.getValueType());
5571 }
5572 }
5573
5574 SDValue CCVal;
5575 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
5576 EVT VT = TVal.getValueType();
5577 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
5578 }
5579
5580 // Now we know we're dealing with FP values.
5581 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5582, __PRETTY_FUNCTION__))
5582 LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT
::f32 || LHS.getValueType() == MVT::f64) ? static_cast<void
> (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5582, __PRETTY_FUNCTION__))
;
5583 assert(LHS.getValueType() == RHS.getValueType())((LHS.getValueType() == RHS.getValueType()) ? static_cast<
void> (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5583, __PRETTY_FUNCTION__))
;
5584 EVT VT = TVal.getValueType();
5585 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
5586
5587 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
5588 // clean. Some of them require two CSELs to implement.
5589 AArch64CC::CondCode CC1, CC2;
5590 changeFPCCToAArch64CC(CC, CC1, CC2);
5591
5592 if (DAG.getTarget().Options.UnsafeFPMath) {
5593 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
5594 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
5595 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
5596 if (RHSVal && RHSVal->isZero()) {
5597 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
5598 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
5599
5600 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
5601 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
5602 TVal = LHS;
5603 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
5604 CFVal && CFVal->isZero() &&
5605 FVal.getValueType() == LHS.getValueType())
5606 FVal = LHS;
5607 }
5608 }
5609
5610 // Emit first, and possibly only, CSEL.
5611 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
5612 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
5613
5614 // If we need a second CSEL, emit it, using the output of the first as the
5615 // RHS. We're effectively OR'ing the two CC's together.
5616 if (CC2 != AArch64CC::AL) {
5617 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
5618 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
5619 }
5620
5621 // Otherwise, return the output of the first CSEL.
5622 return CS1;
5623}
5624
5625SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
5626 SelectionDAG &DAG) const {
5627 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
5628 SDValue LHS = Op.getOperand(0);
5629 SDValue RHS = Op.getOperand(1);
5630 SDValue TVal = Op.getOperand(2);
5631 SDValue FVal = Op.getOperand(3);
5632 SDLoc DL(Op);
5633 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
5634}
5635
5636SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
5637 SelectionDAG &DAG) const {
5638 SDValue CCVal = Op->getOperand(0);
5639 SDValue TVal = Op->getOperand(1);
5640 SDValue FVal = Op->getOperand(2);
5641 SDLoc DL(Op);
5642
5643 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
5644 // instruction.
5645 if (ISD::isOverflowIntrOpRes(CCVal)) {
5646 // Only lower legal XALUO ops.
5647 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
5648 return SDValue();
5649
5650 AArch64CC::CondCode OFCC;
5651 SDValue Value, Overflow;
5652 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
5653 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
5654
5655 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
5656 CCVal, Overflow);
5657 }
5658
5659 // Lower it the same way as we would lower a SELECT_CC node.
5660 ISD::CondCode CC;
5661 SDValue LHS, RHS;
5662 if (CCVal.getOpcode() == ISD::SETCC) {
5663 LHS = CCVal.getOperand(0);
5664 RHS = CCVal.getOperand(1);
5665 CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
5666 } else {
5667 LHS = CCVal;
5668 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
5669 CC = ISD::SETNE;
5670 }
5671 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
5672}
5673
5674SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
5675 SelectionDAG &DAG) const {
5676 // Jump table entries as PC relative offsets. No additional tweaking
5677 // is necessary here. Just get the address of the jump table.
5678 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
5679
5680 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
5681 !Subtarget->isTargetMachO()) {
5682 return getAddrLarge(JT, DAG);
5683 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5684 return getAddrTiny(JT, DAG);
5685 }
5686 return getAddr(JT, DAG);
5687}
5688
5689SDValue AArch64TargetLowering::LowerBR_JT(SDValue Op,
5690 SelectionDAG &DAG) const {
5691 // Jump table entries as PC relative offsets. No additional tweaking
5692 // is necessary here. Just get the address of the jump table.
5693 SDLoc DL(Op);
5694 SDValue JT = Op.getOperand(1);
5695 SDValue Entry = Op.getOperand(2);
5696 int JTI = cast<JumpTableSDNode>(JT.getNode())->getIndex();
5697
5698 SDNode *Dest =
5699 DAG.getMachineNode(AArch64::JumpTableDest32, DL, MVT::i64, MVT::i64, JT,
5700 Entry, DAG.getTargetJumpTable(JTI, MVT::i32));
5701 return DAG.getNode(ISD::BRIND, DL, MVT::Other, Op.getOperand(0),
5702 SDValue(Dest, 0));
5703}
5704
5705SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
5706 SelectionDAG &DAG) const {
5707 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
5708
5709 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
5710 // Use the GOT for the large code model on iOS.
5711 if (Subtarget->isTargetMachO()) {
5712 return getGOT(CP, DAG);
5713 }
5714 return getAddrLarge(CP, DAG);
5715 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5716 return getAddrTiny(CP, DAG);
5717 } else {
5718 return getAddr(CP, DAG);
5719 }
5720}
5721
5722SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
5723 SelectionDAG &DAG) const {
5724 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
5725 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
5726 !Subtarget->isTargetMachO()) {
5727 return getAddrLarge(BA, DAG);
5728 } else if (getTargetMachine().getCodeModel() == CodeModel::Tiny) {
5729 return getAddrTiny(BA, DAG);
5730 }
5731 return getAddr(BA, DAG);
5732}
5733
5734SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
5735 SelectionDAG &DAG) const {
5736 AArch64FunctionInfo *FuncInfo =
5737 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
5738
5739 SDLoc DL(Op);
5740 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
5741 getPointerTy(DAG.getDataLayout()));
5742 FR = DAG.getZExtOrTrunc(FR, DL, getPointerMemTy(DAG.getDataLayout()));
5743 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5744 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
5745 MachinePointerInfo(SV));
5746}
5747
5748SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
5749 SelectionDAG &DAG) const {
5750 AArch64FunctionInfo *FuncInfo =
5751 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
5752
5753 SDLoc DL(Op);
5754 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
5755 ? FuncInfo->getVarArgsGPRIndex()
5756 : FuncInfo->getVarArgsStackIndex(),
5757 getPointerTy(DAG.getDataLayout()));
5758 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5759 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
5760 MachinePointerInfo(SV));
5761}
5762
5763SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
5764 SelectionDAG &DAG) const {
5765 // The layout of the va_list struct is specified in the AArch64 Procedure Call
5766 // Standard, section B.3.
5767 MachineFunction &MF = DAG.getMachineFunction();
5768 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
5769 auto PtrVT = getPointerTy(DAG.getDataLayout());
5770 SDLoc DL(Op);
5771
5772 SDValue Chain = Op.getOperand(0);
5773 SDValue VAList = Op.getOperand(1);
5774 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5775 SmallVector<SDValue, 4> MemOps;
5776
5777 // void *__stack at offset 0
5778 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
5779 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
5780 MachinePointerInfo(SV), /* Alignment = */ 8));
5781
5782 // void *__gr_top at offset 8
5783 int GPRSize = FuncInfo->getVarArgsGPRSize();
5784 if (GPRSize > 0) {
5785 SDValue GRTop, GRTopAddr;
5786
5787 GRTopAddr =
5788 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
5789
5790 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
5791 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
5792 DAG.getConstant(GPRSize, DL, PtrVT));
5793
5794 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
5795 MachinePointerInfo(SV, 8),
5796 /* Alignment = */ 8));
5797 }
5798
5799 // void *__vr_top at offset 16
5800 int FPRSize = FuncInfo->getVarArgsFPRSize();
5801 if (FPRSize > 0) {
5802 SDValue VRTop, VRTopAddr;
5803 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5804 DAG.getConstant(16, DL, PtrVT));
5805
5806 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
5807 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
5808 DAG.getConstant(FPRSize, DL, PtrVT));
5809
5810 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
5811 MachinePointerInfo(SV, 16),
5812 /* Alignment = */ 8));
5813 }
5814
5815 // int __gr_offs at offset 24
5816 SDValue GROffsAddr =
5817 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
5818 MemOps.push_back(DAG.getStore(
5819 Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
5820 MachinePointerInfo(SV, 24), /* Alignment = */ 4));
5821
5822 // int __vr_offs at offset 28
5823 SDValue VROffsAddr =
5824 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
5825 MemOps.push_back(DAG.getStore(
5826 Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
5827 MachinePointerInfo(SV, 28), /* Alignment = */ 4));
5828
5829 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
5830}
5831
5832SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
5833 SelectionDAG &DAG) const {
5834 MachineFunction &MF = DAG.getMachineFunction();
5835
5836 if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
5837 return LowerWin64_VASTART(Op, DAG);
5838 else if (Subtarget->isTargetDarwin())
5839 return LowerDarwin_VASTART(Op, DAG);
5840 else
5841 return LowerAAPCS_VASTART(Op, DAG);
5842}
5843
5844SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
5845 SelectionDAG &DAG) const {
5846 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
5847 // pointer.
5848 SDLoc DL(Op);
5849 unsigned PtrSize = Subtarget->isTargetILP32() ? 4 : 8;
5850 unsigned VaListSize = (Subtarget->isTargetDarwin() ||
5851 Subtarget->isTargetWindows()) ? PtrSize : 32;
5852 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
5853 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
5854
5855 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1), Op.getOperand(2),
5856 DAG.getConstant(VaListSize, DL, MVT::i32),
5857 Align(PtrSize), false, false, false,
5858 MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
5859}
5860
5861SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
5862 assert(Subtarget->isTargetDarwin() &&((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5863, __PRETTY_FUNCTION__))
5863 "automatic va_arg instruction only works on Darwin")((Subtarget->isTargetDarwin() && "automatic va_arg instruction only works on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5863, __PRETTY_FUNCTION__))
;
5864
5865 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
5866 EVT VT = Op.getValueType();
5867 SDLoc DL(Op);
5868 SDValue Chain = Op.getOperand(0);
5869 SDValue Addr = Op.getOperand(1);
5870 unsigned Align = Op.getConstantOperandVal(3);
5871 unsigned MinSlotSize = Subtarget->isTargetILP32() ? 4 : 8;
5872 auto PtrVT = getPointerTy(DAG.getDataLayout());
5873 auto PtrMemVT = getPointerMemTy(DAG.getDataLayout());
5874 SDValue VAList =
5875 DAG.getLoad(PtrMemVT, DL, Chain, Addr, MachinePointerInfo(V));
5876 Chain = VAList.getValue(1);
5877 VAList = DAG.getZExtOrTrunc(VAList, DL, PtrVT);
5878
5879 if (Align > MinSlotSize) {
5880 assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2")((((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2"
) ? static_cast<void> (0) : __assert_fail ("((Align & (Align - 1)) == 0) && \"Expected Align to be a power of 2\""
, "/build/llvm-toolchain-snapshot-11~++20200309111110+2c36c23f347/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5880, __PRETTY_FUNCTION__))
;
5881 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
5882 DAG.getConstant(Align - 1, DL, PtrVT));
5883 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
5884 DAG.getConstant(-(int64_t)Align, DL, PtrVT));