Bug Summary

File:lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 8598, column 48
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn338205/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-7~svn338205/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn338205/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/x86_64-linux-gnu/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/x86_64-linux-gnu/c++/8 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/8/../../../../include/c++/8/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/lib/gcc/x86_64-linux-gnu/8/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn338205/build-llvm/lib/Target/AArch64 -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-07-29-043837-17923-1 -x c++ /build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp -faddrsig
1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the AArch64TargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64ISelLowering.h"
15#include "AArch64CallingConvention.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/CallingConvLower.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstr.h"
38#include "llvm/CodeGen/MachineInstrBuilder.h"
39#include "llvm/CodeGen/MachineMemOperand.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/RuntimeLibcalls.h"
42#include "llvm/CodeGen/SelectionDAG.h"
43#include "llvm/CodeGen/SelectionDAGNodes.h"
44#include "llvm/CodeGen/TargetCallingConv.h"
45#include "llvm/CodeGen/TargetInstrInfo.h"
46#include "llvm/CodeGen/ValueTypes.h"
47#include "llvm/IR/Attributes.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/DataLayout.h"
50#include "llvm/IR/DebugLoc.h"
51#include "llvm/IR/DerivedTypes.h"
52#include "llvm/IR/Function.h"
53#include "llvm/IR/GetElementPtrTypeIterator.h"
54#include "llvm/IR/GlobalValue.h"
55#include "llvm/IR/IRBuilder.h"
56#include "llvm/IR/Instruction.h"
57#include "llvm/IR/Instructions.h"
58#include "llvm/IR/Intrinsics.h"
59#include "llvm/IR/Module.h"
60#include "llvm/IR/OperandTraits.h"
61#include "llvm/IR/Type.h"
62#include "llvm/IR/Use.h"
63#include "llvm/IR/Value.h"
64#include "llvm/MC/MCRegisterInfo.h"
65#include "llvm/Support/Casting.h"
66#include "llvm/Support/CodeGen.h"
67#include "llvm/Support/CommandLine.h"
68#include "llvm/Support/Compiler.h"
69#include "llvm/Support/Debug.h"
70#include "llvm/Support/ErrorHandling.h"
71#include "llvm/Support/KnownBits.h"
72#include "llvm/Support/MachineValueType.h"
73#include "llvm/Support/MathExtras.h"
74#include "llvm/Support/raw_ostream.h"
75#include "llvm/Target/TargetMachine.h"
76#include "llvm/Target/TargetOptions.h"
77#include <algorithm>
78#include <bitset>
79#include <cassert>
80#include <cctype>
81#include <cstdint>
82#include <cstdlib>
83#include <iterator>
84#include <limits>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90
91#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
92
93STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
94STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts", {0}, {false}}
;
95STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized", {0}, {false}}
;
96
97static cl::opt<bool>
98EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
99 cl::desc("Allow AArch64 SLI/SRI formation"),
100 cl::init(false));
101
102// FIXME: The necessary dtprel relocations don't seem to be supported
103// well in the GNU bfd and gold linkers at the moment. Therefore, by
104// default, for now, fall back to GeneralDynamic code generation.
105cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
106 "aarch64-elf-ldtls-generation", cl::Hidden,
107 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
108 cl::init(false));
109
110static cl::opt<bool>
111EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
112 cl::desc("Enable AArch64 logical imm instruction "
113 "optimization"),
114 cl::init(true));
115
116/// Value type used for condition codes.
117static const MVT MVT_CC = MVT::i32;
118
119AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
120 const AArch64Subtarget &STI)
121 : TargetLowering(TM), Subtarget(&STI) {
122 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
123 // we have to make something up. Arbitrarily, choose ZeroOrOne.
124 setBooleanContents(ZeroOrOneBooleanContent);
125 // When comparing vectors the result sets the different elements in the
126 // vector to all-one or all-zero.
127 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
128
129 // Set up the register classes.
130 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
131 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
132
133 if (Subtarget->hasFPARMv8()) {
134 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
135 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
136 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
137 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
138 }
139
140 if (Subtarget->hasNEON()) {
141 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
142 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
143 // Someone set us up the NEON.
144 addDRTypeForNEON(MVT::v2f32);
145 addDRTypeForNEON(MVT::v8i8);
146 addDRTypeForNEON(MVT::v4i16);
147 addDRTypeForNEON(MVT::v2i32);
148 addDRTypeForNEON(MVT::v1i64);
149 addDRTypeForNEON(MVT::v1f64);
150 addDRTypeForNEON(MVT::v4f16);
151
152 addQRTypeForNEON(MVT::v4f32);
153 addQRTypeForNEON(MVT::v2f64);
154 addQRTypeForNEON(MVT::v16i8);
155 addQRTypeForNEON(MVT::v8i16);
156 addQRTypeForNEON(MVT::v4i32);
157 addQRTypeForNEON(MVT::v2i64);
158 addQRTypeForNEON(MVT::v8f16);
159 }
160
161 // Compute derived properties from the register classes
162 computeRegisterProperties(Subtarget->getRegisterInfo());
163
164 // Provide all sorts of operation actions
165 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
166 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
167 setOperationAction(ISD::SETCC, MVT::i32, Custom);
168 setOperationAction(ISD::SETCC, MVT::i64, Custom);
169 setOperationAction(ISD::SETCC, MVT::f16, Custom);
170 setOperationAction(ISD::SETCC, MVT::f32, Custom);
171 setOperationAction(ISD::SETCC, MVT::f64, Custom);
172 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
173 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
174 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
175 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
176 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
177 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
178 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
179 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
180 setOperationAction(ISD::SELECT, MVT::i32, Custom);
181 setOperationAction(ISD::SELECT, MVT::i64, Custom);
182 setOperationAction(ISD::SELECT, MVT::f16, Custom);
183 setOperationAction(ISD::SELECT, MVT::f32, Custom);
184 setOperationAction(ISD::SELECT, MVT::f64, Custom);
185 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
186 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
187 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
188 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
189 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
190 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
191 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
192
193 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
194 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
195 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
196
197 setOperationAction(ISD::FREM, MVT::f32, Expand);
198 setOperationAction(ISD::FREM, MVT::f64, Expand);
199 setOperationAction(ISD::FREM, MVT::f80, Expand);
200
201 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
202
203 // Custom lowering hooks are needed for XOR
204 // to fold it into CSINC/CSINV.
205 setOperationAction(ISD::XOR, MVT::i32, Custom);
206 setOperationAction(ISD::XOR, MVT::i64, Custom);
207
208 // Virtually no operation on f128 is legal, but LLVM can't expand them when
209 // there's a valid register class, so we need custom operations in most cases.
210 setOperationAction(ISD::FABS, MVT::f128, Expand);
211 setOperationAction(ISD::FADD, MVT::f128, Custom);
212 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
213 setOperationAction(ISD::FCOS, MVT::f128, Expand);
214 setOperationAction(ISD::FDIV, MVT::f128, Custom);
215 setOperationAction(ISD::FMA, MVT::f128, Expand);
216 setOperationAction(ISD::FMUL, MVT::f128, Custom);
217 setOperationAction(ISD::FNEG, MVT::f128, Expand);
218 setOperationAction(ISD::FPOW, MVT::f128, Expand);
219 setOperationAction(ISD::FREM, MVT::f128, Expand);
220 setOperationAction(ISD::FRINT, MVT::f128, Expand);
221 setOperationAction(ISD::FSIN, MVT::f128, Expand);
222 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
223 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
224 setOperationAction(ISD::FSUB, MVT::f128, Custom);
225 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
226 setOperationAction(ISD::SETCC, MVT::f128, Custom);
227 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
228 setOperationAction(ISD::SELECT, MVT::f128, Custom);
229 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
230 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
231
232 // Lowering for many of the conversions is actually specified by the non-f128
233 // type. The LowerXXX function will be trivial when f128 isn't involved.
234 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
235 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
236 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
237 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
238 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
239 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
240 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
241 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
242 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
243 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
244 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
245 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
246 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
247 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
248
249 // Variable arguments.
250 setOperationAction(ISD::VASTART, MVT::Other, Custom);
251 setOperationAction(ISD::VAARG, MVT::Other, Custom);
252 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
253 setOperationAction(ISD::VAEND, MVT::Other, Expand);
254
255 // Variable-sized objects.
256 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
257 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
258
259 if (Subtarget->isTargetWindows())
260 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
261 else
262 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
263
264 // Constant pool entries
265 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
266
267 // BlockAddress
268 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
269
270 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
271 setOperationAction(ISD::ADDC, MVT::i32, Custom);
272 setOperationAction(ISD::ADDE, MVT::i32, Custom);
273 setOperationAction(ISD::SUBC, MVT::i32, Custom);
274 setOperationAction(ISD::SUBE, MVT::i32, Custom);
275 setOperationAction(ISD::ADDC, MVT::i64, Custom);
276 setOperationAction(ISD::ADDE, MVT::i64, Custom);
277 setOperationAction(ISD::SUBC, MVT::i64, Custom);
278 setOperationAction(ISD::SUBE, MVT::i64, Custom);
279
280 // AArch64 lacks both left-rotate and popcount instructions.
281 setOperationAction(ISD::ROTL, MVT::i32, Expand);
282 setOperationAction(ISD::ROTL, MVT::i64, Expand);
283 for (MVT VT : MVT::vector_valuetypes()) {
284 setOperationAction(ISD::ROTL, VT, Expand);
285 setOperationAction(ISD::ROTR, VT, Expand);
286 }
287
288 // AArch64 doesn't have {U|S}MUL_LOHI.
289 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
290 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
291
292 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
293 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
294
295 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
296 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
297 for (MVT VT : MVT::vector_valuetypes()) {
298 setOperationAction(ISD::SDIVREM, VT, Expand);
299 setOperationAction(ISD::UDIVREM, VT, Expand);
300 }
301 setOperationAction(ISD::SREM, MVT::i32, Expand);
302 setOperationAction(ISD::SREM, MVT::i64, Expand);
303 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
304 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
305 setOperationAction(ISD::UREM, MVT::i32, Expand);
306 setOperationAction(ISD::UREM, MVT::i64, Expand);
307
308 // Custom lower Add/Sub/Mul with overflow.
309 setOperationAction(ISD::SADDO, MVT::i32, Custom);
310 setOperationAction(ISD::SADDO, MVT::i64, Custom);
311 setOperationAction(ISD::UADDO, MVT::i32, Custom);
312 setOperationAction(ISD::UADDO, MVT::i64, Custom);
313 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
314 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
315 setOperationAction(ISD::USUBO, MVT::i32, Custom);
316 setOperationAction(ISD::USUBO, MVT::i64, Custom);
317 setOperationAction(ISD::SMULO, MVT::i32, Custom);
318 setOperationAction(ISD::SMULO, MVT::i64, Custom);
319 setOperationAction(ISD::UMULO, MVT::i32, Custom);
320 setOperationAction(ISD::UMULO, MVT::i64, Custom);
321
322 setOperationAction(ISD::FSIN, MVT::f32, Expand);
323 setOperationAction(ISD::FSIN, MVT::f64, Expand);
324 setOperationAction(ISD::FCOS, MVT::f32, Expand);
325 setOperationAction(ISD::FCOS, MVT::f64, Expand);
326 setOperationAction(ISD::FPOW, MVT::f32, Expand);
327 setOperationAction(ISD::FPOW, MVT::f64, Expand);
328 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
329 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
330 if (Subtarget->hasFullFP16())
331 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
332 else
333 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
334
335 setOperationAction(ISD::FREM, MVT::f16, Promote);
336 setOperationAction(ISD::FREM, MVT::v4f16, Promote);
337 setOperationAction(ISD::FREM, MVT::v8f16, Promote);
338 setOperationAction(ISD::FPOW, MVT::f16, Promote);
339 setOperationAction(ISD::FPOW, MVT::v4f16, Promote);
340 setOperationAction(ISD::FPOW, MVT::v8f16, Promote);
341 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
342 setOperationAction(ISD::FCOS, MVT::f16, Promote);
343 setOperationAction(ISD::FCOS, MVT::v4f16, Promote);
344 setOperationAction(ISD::FCOS, MVT::v8f16, Promote);
345 setOperationAction(ISD::FSIN, MVT::f16, Promote);
346 setOperationAction(ISD::FSIN, MVT::v4f16, Promote);
347 setOperationAction(ISD::FSIN, MVT::v8f16, Promote);
348 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
349 setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
350 setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
351 setOperationAction(ISD::FEXP, MVT::f16, Promote);
352 setOperationAction(ISD::FEXP, MVT::v4f16, Promote);
353 setOperationAction(ISD::FEXP, MVT::v8f16, Promote);
354 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
355 setOperationAction(ISD::FEXP2, MVT::v4f16, Promote);
356 setOperationAction(ISD::FEXP2, MVT::v8f16, Promote);
357 setOperationAction(ISD::FLOG, MVT::f16, Promote);
358 setOperationAction(ISD::FLOG, MVT::v4f16, Promote);
359 setOperationAction(ISD::FLOG, MVT::v8f16, Promote);
360 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
361 setOperationAction(ISD::FLOG2, MVT::v4f16, Promote);
362 setOperationAction(ISD::FLOG2, MVT::v8f16, Promote);
363 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
364 setOperationAction(ISD::FLOG10, MVT::v4f16, Promote);
365 setOperationAction(ISD::FLOG10, MVT::v8f16, Promote);
366
367 if (!Subtarget->hasFullFP16()) {
368 setOperationAction(ISD::SELECT, MVT::f16, Promote);
369 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
370 setOperationAction(ISD::SETCC, MVT::f16, Promote);
371 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
372 setOperationAction(ISD::FADD, MVT::f16, Promote);
373 setOperationAction(ISD::FSUB, MVT::f16, Promote);
374 setOperationAction(ISD::FMUL, MVT::f16, Promote);
375 setOperationAction(ISD::FDIV, MVT::f16, Promote);
376 setOperationAction(ISD::FMA, MVT::f16, Promote);
377 setOperationAction(ISD::FNEG, MVT::f16, Promote);
378 setOperationAction(ISD::FABS, MVT::f16, Promote);
379 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
380 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
381 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
382 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
383 setOperationAction(ISD::FRINT, MVT::f16, Promote);
384 setOperationAction(ISD::FROUND, MVT::f16, Promote);
385 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
386 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
387 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
388 setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
389 setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
390
391 // promote v4f16 to v4f32 when that is known to be safe.
392 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
393 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
394 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
395 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
396 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
397 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
398 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
399 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
400 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
401 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
402 AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
403 AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
404
405 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
406 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
407 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
408 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
409 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
410 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
411 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
412 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
413 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
414 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
415 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
416 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
417 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
418 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
419 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
420
421 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
422 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
423 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
424 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
425 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
426 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
427 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
428 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
429 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
430 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
431 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
432 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
433 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
434 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
435 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
436 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
437 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
438 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
439 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
440 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
441 }
442
443 // AArch64 has implementations of a lot of rounding-like FP operations.
444 for (MVT Ty : {MVT::f32, MVT::f64}) {
445 setOperationAction(ISD::FFLOOR, Ty, Legal);
446 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
447 setOperationAction(ISD::FCEIL, Ty, Legal);
448 setOperationAction(ISD::FRINT, Ty, Legal);
449 setOperationAction(ISD::FTRUNC, Ty, Legal);
450 setOperationAction(ISD::FROUND, Ty, Legal);
451 setOperationAction(ISD::FMINNUM, Ty, Legal);
452 setOperationAction(ISD::FMAXNUM, Ty, Legal);
453 setOperationAction(ISD::FMINNAN, Ty, Legal);
454 setOperationAction(ISD::FMAXNAN, Ty, Legal);
455 }
456
457 if (Subtarget->hasFullFP16()) {
458 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
459 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
460 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
461 setOperationAction(ISD::FRINT, MVT::f16, Legal);
462 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
463 setOperationAction(ISD::FROUND, MVT::f16, Legal);
464 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
465 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
466 setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
467 setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
468 }
469
470 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
471
472 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
473
474 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
475 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
476 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
477 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
478 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
479
480 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
481 // This requires the Performance Monitors extension.
482 if (Subtarget->hasPerfMon())
483 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
484
485 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
486 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
487 // Issue __sincos_stret if available.
488 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
489 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
490 } else {
491 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
492 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
493 }
494
495 // Make floating-point constants legal for the large code model, so they don't
496 // become loads from the constant pool.
497 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
498 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
499 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
500 }
501
502 // AArch64 does not have floating-point extending loads, i1 sign-extending
503 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
504 for (MVT VT : MVT::fp_valuetypes()) {
505 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
506 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
507 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
508 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
509 }
510 for (MVT VT : MVT::integer_valuetypes())
511 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
512
513 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
514 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
515 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
516 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
517 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
518 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
519 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
520
521 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
522 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
523
524 // Indexed loads and stores are supported.
525 for (unsigned im = (unsigned)ISD::PRE_INC;
526 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
527 setIndexedLoadAction(im, MVT::i8, Legal);
528 setIndexedLoadAction(im, MVT::i16, Legal);
529 setIndexedLoadAction(im, MVT::i32, Legal);
530 setIndexedLoadAction(im, MVT::i64, Legal);
531 setIndexedLoadAction(im, MVT::f64, Legal);
532 setIndexedLoadAction(im, MVT::f32, Legal);
533 setIndexedLoadAction(im, MVT::f16, Legal);
534 setIndexedStoreAction(im, MVT::i8, Legal);
535 setIndexedStoreAction(im, MVT::i16, Legal);
536 setIndexedStoreAction(im, MVT::i32, Legal);
537 setIndexedStoreAction(im, MVT::i64, Legal);
538 setIndexedStoreAction(im, MVT::f64, Legal);
539 setIndexedStoreAction(im, MVT::f32, Legal);
540 setIndexedStoreAction(im, MVT::f16, Legal);
541 }
542
543 // Trap.
544 setOperationAction(ISD::TRAP, MVT::Other, Legal);
545
546 // We combine OR nodes for bitfield operations.
547 setTargetDAGCombine(ISD::OR);
548
549 // Vector add and sub nodes may conceal a high-half opportunity.
550 // Also, try to fold ADD into CSINC/CSINV..
551 setTargetDAGCombine(ISD::ADD);
552 setTargetDAGCombine(ISD::SUB);
553 setTargetDAGCombine(ISD::SRL);
554 setTargetDAGCombine(ISD::XOR);
555 setTargetDAGCombine(ISD::SINT_TO_FP);
556 setTargetDAGCombine(ISD::UINT_TO_FP);
557
558 setTargetDAGCombine(ISD::FP_TO_SINT);
559 setTargetDAGCombine(ISD::FP_TO_UINT);
560 setTargetDAGCombine(ISD::FDIV);
561
562 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
563
564 setTargetDAGCombine(ISD::ANY_EXTEND);
565 setTargetDAGCombine(ISD::ZERO_EXTEND);
566 setTargetDAGCombine(ISD::SIGN_EXTEND);
567 setTargetDAGCombine(ISD::BITCAST);
568 setTargetDAGCombine(ISD::CONCAT_VECTORS);
569 setTargetDAGCombine(ISD::STORE);
570 if (Subtarget->supportsAddressTopByteIgnored())
571 setTargetDAGCombine(ISD::LOAD);
572
573 setTargetDAGCombine(ISD::MUL);
574
575 setTargetDAGCombine(ISD::SELECT);
576 setTargetDAGCombine(ISD::VSELECT);
577
578 setTargetDAGCombine(ISD::INTRINSIC_VOID);
579 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
580 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
581
582 setTargetDAGCombine(ISD::GlobalAddress);
583
584 // In case of strict alignment, avoid an excessive number of byte wide stores.
585 MaxStoresPerMemsetOptSize = 8;
586 MaxStoresPerMemset = Subtarget->requiresStrictAlign()
587 ? MaxStoresPerMemsetOptSize : 32;
588
589 MaxGluedStoresPerMemcpy = 4;
590 MaxStoresPerMemcpyOptSize = 4;
591 MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
592 ? MaxStoresPerMemcpyOptSize : 16;
593
594 MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
595
596 setStackPointerRegisterToSaveRestore(AArch64::SP);
597
598 setSchedulingPreference(Sched::Hybrid);
599
600 EnableExtLdPromotion = true;
601
602 // Set required alignment.
603 setMinFunctionAlignment(2);
604 // Set preferred alignments.
605 setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
606 setPrefLoopAlignment(STI.getPrefLoopAlignment());
607
608 // Only change the limit for entries in a jump table if specified by
609 // the subtarget, but not at the command line.
610 unsigned MaxJT = STI.getMaximumJumpTableSize();
611 if (MaxJT && getMaximumJumpTableSize() == 0)
612 setMaximumJumpTableSize(MaxJT);
613
614 setHasExtractBitsInsn(true);
615
616 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
617
618 if (Subtarget->hasNEON()) {
619 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
620 // silliness like this:
621 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
622 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
623 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
624 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
625 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
626 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
627 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
628 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
629 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
630 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
631 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
632 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
633 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
634 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
635 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
636 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
637 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
638 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
639 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
640 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
641 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
642 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
643 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
644 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
645 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
646
647 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
648 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
649 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
650 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
651 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
652
653 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
654
655 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
656 // elements smaller than i32, so promote the input to i32 first.
657 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
658 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
659 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
660 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
661 // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
662 // -> v8f16 conversions.
663 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
664 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
665 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
666 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
667 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
668 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
669 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
670 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
671 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
672 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
673 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
674 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
675 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
676
677 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
678 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
679
680 setOperationAction(ISD::CTTZ, MVT::v2i8, Expand);
681 setOperationAction(ISD::CTTZ, MVT::v4i16, Expand);
682 setOperationAction(ISD::CTTZ, MVT::v2i32, Expand);
683 setOperationAction(ISD::CTTZ, MVT::v1i64, Expand);
684 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
685 setOperationAction(ISD::CTTZ, MVT::v8i16, Expand);
686 setOperationAction(ISD::CTTZ, MVT::v4i32, Expand);
687 setOperationAction(ISD::CTTZ, MVT::v2i64, Expand);
688
689 // AArch64 doesn't have MUL.2d:
690 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
691 // Custom handling for some quad-vector types to detect MULL.
692 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
693 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
694 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
695
696 // Vector reductions
697 for (MVT VT : MVT::integer_valuetypes()) {
698 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
699 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
700 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
701 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
702 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
703 }
704 for (MVT VT : MVT::fp_valuetypes()) {
705 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
706 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
707 }
708
709 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
710 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
711 // Likewise, narrowing and extending vector loads/stores aren't handled
712 // directly.
713 for (MVT VT : MVT::vector_valuetypes()) {
714 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
715
716 if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32) {
717 setOperationAction(ISD::MULHS, VT, Custom);
718 setOperationAction(ISD::MULHU, VT, Custom);
719 } else {
720 setOperationAction(ISD::MULHS, VT, Expand);
721 setOperationAction(ISD::MULHU, VT, Expand);
722 }
723 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
724 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
725
726 setOperationAction(ISD::BSWAP, VT, Expand);
727
728 for (MVT InnerVT : MVT::vector_valuetypes()) {
729 setTruncStoreAction(VT, InnerVT, Expand);
730 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
731 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
732 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
733 }
734 }
735
736 // AArch64 has implementations of a lot of rounding-like FP operations.
737 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
738 setOperationAction(ISD::FFLOOR, Ty, Legal);
739 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
740 setOperationAction(ISD::FCEIL, Ty, Legal);
741 setOperationAction(ISD::FRINT, Ty, Legal);
742 setOperationAction(ISD::FTRUNC, Ty, Legal);
743 setOperationAction(ISD::FROUND, Ty, Legal);
744 }
745
746 setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
747 }
748
749 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
750}
751
752void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
753 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 753, __extension__ __PRETTY_FUNCTION__))
;
754
755 if (VT.isFloatingPoint()) {
756 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
757 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
758 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
759 }
760
761 // Mark vector float intrinsics as expand.
762 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
763 setOperationAction(ISD::FSIN, VT, Expand);
764 setOperationAction(ISD::FCOS, VT, Expand);
765 setOperationAction(ISD::FPOW, VT, Expand);
766 setOperationAction(ISD::FLOG, VT, Expand);
767 setOperationAction(ISD::FLOG2, VT, Expand);
768 setOperationAction(ISD::FLOG10, VT, Expand);
769 setOperationAction(ISD::FEXP, VT, Expand);
770 setOperationAction(ISD::FEXP2, VT, Expand);
771
772 // But we do support custom-lowering for FCOPYSIGN.
773 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
774 }
775
776 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
777 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
778 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
779 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
780 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
781 setOperationAction(ISD::SRA, VT, Custom);
782 setOperationAction(ISD::SRL, VT, Custom);
783 setOperationAction(ISD::SHL, VT, Custom);
784 setOperationAction(ISD::AND, VT, Custom);
785 setOperationAction(ISD::OR, VT, Custom);
786 setOperationAction(ISD::SETCC, VT, Custom);
787 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
788
789 setOperationAction(ISD::SELECT, VT, Expand);
790 setOperationAction(ISD::SELECT_CC, VT, Expand);
791 setOperationAction(ISD::VSELECT, VT, Expand);
792 for (MVT InnerVT : MVT::all_valuetypes())
793 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
794
795 // CNT supports only B element sizes.
796 if (VT != MVT::v8i8 && VT != MVT::v16i8)
797 setOperationAction(ISD::CTPOP, VT, Expand);
798
799 setOperationAction(ISD::UDIV, VT, Expand);
800 setOperationAction(ISD::SDIV, VT, Expand);
801 setOperationAction(ISD::UREM, VT, Expand);
802 setOperationAction(ISD::SREM, VT, Expand);
803 setOperationAction(ISD::FREM, VT, Expand);
804
805 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
806 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
807
808 if (!VT.isFloatingPoint())
809 setOperationAction(ISD::ABS, VT, Legal);
810
811 // [SU][MIN|MAX] are available for all NEON types apart from i64.
812 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
813 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
814 setOperationAction(Opcode, VT, Legal);
815
816 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
817 if (VT.isFloatingPoint() &&
818 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
819 for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
820 ISD::FMINNUM, ISD::FMAXNUM})
821 setOperationAction(Opcode, VT, Legal);
822
823 if (Subtarget->isLittleEndian()) {
824 for (unsigned im = (unsigned)ISD::PRE_INC;
825 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
826 setIndexedLoadAction(im, VT, Legal);
827 setIndexedStoreAction(im, VT, Legal);
828 }
829 }
830}
831
832void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
833 addRegisterClass(VT, &AArch64::FPR64RegClass);
834 addTypeForNEON(VT, MVT::v2i32);
835}
836
837void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
838 addRegisterClass(VT, &AArch64::FPR128RegClass);
839 addTypeForNEON(VT, MVT::v4i32);
840}
841
842EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
843 EVT VT) const {
844 if (!VT.isVector())
845 return MVT::i32;
846 return VT.changeVectorElementTypeToInteger();
847}
848
849static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
850 const APInt &Demanded,
851 TargetLowering::TargetLoweringOpt &TLO,
852 unsigned NewOpc) {
853 uint64_t OldImm = Imm, NewImm, Enc;
854 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
855
856 // Return if the immediate is already all zeros, all ones, a bimm32 or a
857 // bimm64.
858 if (Imm == 0 || Imm == Mask ||
859 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
860 return false;
861
862 unsigned EltSize = Size;
863 uint64_t DemandedBits = Demanded.getZExtValue();
864
865 // Clear bits that are not demanded.
866 Imm &= DemandedBits;
867
868 while (true) {
869 // The goal here is to set the non-demanded bits in a way that minimizes
870 // the number of switching between 0 and 1. In order to achieve this goal,
871 // we set the non-demanded bits to the value of the preceding demanded bits.
872 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
873 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
874 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
875 // The final result is 0b11000011.
876 uint64_t NonDemandedBits = ~DemandedBits;
877 uint64_t InvertedImm = ~Imm & DemandedBits;
878 uint64_t RotatedImm =
879 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
880 NonDemandedBits;
881 uint64_t Sum = RotatedImm + NonDemandedBits;
882 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
883 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
884 NewImm = (Imm | Ones) & Mask;
885
886 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
887 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
888 // we halve the element size and continue the search.
889 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
890 break;
891
892 // We cannot shrink the element size any further if it is 2-bits.
893 if (EltSize == 2)
894 return false;
895
896 EltSize /= 2;
897 Mask >>= EltSize;
898 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
899
900 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
901 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
902 return false;
903
904 // Merge the upper and lower halves of Imm and DemandedBits.
905 Imm |= Hi;
906 DemandedBits |= DemandedBitsHi;
907 }
908
909 ++NumOptimizedImms;
910
911 // Replicate the element across the register width.
912 while (EltSize < Size) {
913 NewImm |= NewImm << EltSize;
914 EltSize *= 2;
915 }
916
917 (void)OldImm;
918 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 919, __extension__ __PRETTY_FUNCTION__))
919 "demanded bits should never be altered")(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 919, __extension__ __PRETTY_FUNCTION__))
;
920 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")(static_cast <bool> (OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? void (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 920, __extension__ __PRETTY_FUNCTION__))
;
921
922 // Create the new constant immediate node.
923 EVT VT = Op.getValueType();
924 SDLoc DL(Op);
925 SDValue New;
926
927 // If the new constant immediate is all-zeros or all-ones, let the target
928 // independent DAG combine optimize this node.
929 if (NewImm == 0 || NewImm == OrigMask) {
930 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
931 TLO.DAG.getConstant(NewImm, DL, VT));
932 // Otherwise, create a machine node so that target independent DAG combine
933 // doesn't undo this optimization.
934 } else {
935 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
936 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
937 New = SDValue(
938 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
939 }
940
941 return TLO.CombineTo(Op, New);
942}
943
944bool AArch64TargetLowering::targetShrinkDemandedConstant(
945 SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
946 // Delay this optimization to as late as possible.
947 if (!TLO.LegalOps)
948 return false;
949
950 if (!EnableOptimizeLogicalImm)
951 return false;
952
953 EVT VT = Op.getValueType();
954 if (VT.isVector())
955 return false;
956
957 unsigned Size = VT.getSizeInBits();
958 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 959, __extension__ __PRETTY_FUNCTION__))
959 "i32 or i64 is expected after legalization.")(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 959, __extension__ __PRETTY_FUNCTION__))
;
960
961 // Exit early if we demand all bits.
962 if (Demanded.countPopulation() == Size)
963 return false;
964
965 unsigned NewOpc;
966 switch (Op.getOpcode()) {
967 default:
968 return false;
969 case ISD::AND:
970 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
971 break;
972 case ISD::OR:
973 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
974 break;
975 case ISD::XOR:
976 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
977 break;
978 }
979 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
980 if (!C)
981 return false;
982 uint64_t Imm = C->getZExtValue();
983 return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
984}
985
986/// computeKnownBitsForTargetNode - Determine which of the bits specified in
987/// Mask are known to be either zero or one and return them Known.
988void AArch64TargetLowering::computeKnownBitsForTargetNode(
989 const SDValue Op, KnownBits &Known,
990 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
991 switch (Op.getOpcode()) {
992 default:
993 break;
994 case AArch64ISD::CSEL: {
995 KnownBits Known2;
996 DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
997 DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
998 Known.Zero &= Known2.Zero;
999 Known.One &= Known2.One;
1000 break;
1001 }
1002 case ISD::INTRINSIC_W_CHAIN: {
1003 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
1004 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
1005 switch (IntID) {
1006 default: return;
1007 case Intrinsic::aarch64_ldaxr:
1008 case Intrinsic::aarch64_ldxr: {
1009 unsigned BitWidth = Known.getBitWidth();
1010 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
1011 unsigned MemBits = VT.getScalarSizeInBits();
1012 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
1013 return;
1014 }
1015 }
1016 break;
1017 }
1018 case ISD::INTRINSIC_WO_CHAIN:
1019 case ISD::INTRINSIC_VOID: {
1020 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1021 switch (IntNo) {
1022 default:
1023 break;
1024 case Intrinsic::aarch64_neon_umaxv:
1025 case Intrinsic::aarch64_neon_uminv: {
1026 // Figure out the datatype of the vector operand. The UMINV instruction
1027 // will zero extend the result, so we can mark as known zero all the
1028 // bits larger than the element datatype. 32-bit or larget doesn't need
1029 // this as those are legal types and will be handled by isel directly.
1030 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1031 unsigned BitWidth = Known.getBitWidth();
1032 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1033 assert(BitWidth >= 8 && "Unexpected width!")(static_cast <bool> (BitWidth >= 8 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1033, __extension__ __PRETTY_FUNCTION__))
;
1034 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1035 Known.Zero |= Mask;
1036 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1037 assert(BitWidth >= 16 && "Unexpected width!")(static_cast <bool> (BitWidth >= 16 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1037, __extension__ __PRETTY_FUNCTION__))
;
1038 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1039 Known.Zero |= Mask;
1040 }
1041 break;
1042 } break;
1043 }
1044 }
1045 }
1046}
1047
1048MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1049 EVT) const {
1050 return MVT::i64;
1051}
1052
1053bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1054 unsigned AddrSpace,
1055 unsigned Align,
1056 bool *Fast) const {
1057 if (Subtarget->requiresStrictAlign())
1058 return false;
1059
1060 if (Fast) {
1061 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1062 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1063 // See comments in performSTORECombine() for more details about
1064 // these conditions.
1065
1066 // Code that uses clang vector extensions can mark that it
1067 // wants unaligned accesses to be treated as fast by
1068 // underspecifying alignment to be 1 or 2.
1069 Align <= 2 ||
1070
1071 // Disregard v2i64. Memcpy lowering produces those and splitting
1072 // them regresses performance on micro-benchmarks and olden/bh.
1073 VT == MVT::v2i64;
1074 }
1075 return true;
1076}
1077
1078FastISel *
1079AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1080 const TargetLibraryInfo *libInfo) const {
1081 return AArch64::createFastISel(funcInfo, libInfo);
1082}
1083
1084const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1085 switch ((AArch64ISD::NodeType)Opcode) {
1086 case AArch64ISD::FIRST_NUMBER: break;
1087 case AArch64ISD::CALL: return "AArch64ISD::CALL";
1088 case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
1089 case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
1090 case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
1091 case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
1092 case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
1093 case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
1094 case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
1095 case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
1096 case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
1097 case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
1098 case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
1099 case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1100 case AArch64ISD::ADC: return "AArch64ISD::ADC";
1101 case AArch64ISD::SBC: return "AArch64ISD::SBC";
1102 case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
1103 case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
1104 case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
1105 case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
1106 case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
1107 case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
1108 case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
1109 case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
1110 case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
1111 case AArch64ISD::DUP: return "AArch64ISD::DUP";
1112 case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
1113 case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
1114 case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
1115 case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
1116 case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
1117 case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
1118 case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
1119 case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
1120 case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
1121 case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
1122 case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
1123 case AArch64ISD::BICi: return "AArch64ISD::BICi";
1124 case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
1125 case AArch64ISD::BSL: return "AArch64ISD::BSL";
1126 case AArch64ISD::NEG: return "AArch64ISD::NEG";
1127 case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
1128 case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
1129 case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
1130 case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
1131 case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
1132 case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
1133 case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
1134 case AArch64ISD::REV16: return "AArch64ISD::REV16";
1135 case AArch64ISD::REV32: return "AArch64ISD::REV32";
1136 case AArch64ISD::REV64: return "AArch64ISD::REV64";
1137 case AArch64ISD::EXT: return "AArch64ISD::EXT";
1138 case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
1139 case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
1140 case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
1141 case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
1142 case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
1143 case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
1144 case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
1145 case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
1146 case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
1147 case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
1148 case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
1149 case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
1150 case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
1151 case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
1152 case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
1153 case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
1154 case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
1155 case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
1156 case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
1157 case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
1158 case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
1159 case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
1160 case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
1161 case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
1162 case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
1163 case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
1164 case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
1165 case AArch64ISD::NOT: return "AArch64ISD::NOT";
1166 case AArch64ISD::BIT: return "AArch64ISD::BIT";
1167 case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
1168 case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
1169 case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
1170 case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
1171 case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
1172 case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
1173 case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
1174 case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
1175 case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
1176 case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
1177 case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
1178 case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
1179 case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
1180 case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
1181 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
1182 case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
1183 case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
1184 case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
1185 case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
1186 case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
1187 case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
1188 case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
1189 case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
1190 case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
1191 case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
1192 case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
1193 case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1194 case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1195 case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1196 case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1197 case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1198 case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1199 case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1200 case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1201 case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1202 case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1203 case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1204 case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1205 case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1206 case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1207 case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1208 case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1209 case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1210 case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1211 }
1212 return nullptr;
1213}
1214
1215MachineBasicBlock *
1216AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
1217 MachineBasicBlock *MBB) const {
1218 // We materialise the F128CSEL pseudo-instruction as some control flow and a
1219 // phi node:
1220
1221 // OrigBB:
1222 // [... previous instrs leading to comparison ...]
1223 // b.ne TrueBB
1224 // b EndBB
1225 // TrueBB:
1226 // ; Fallthrough
1227 // EndBB:
1228 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1229
1230 MachineFunction *MF = MBB->getParent();
1231 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1232 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1233 DebugLoc DL = MI.getDebugLoc();
1234 MachineFunction::iterator It = ++MBB->getIterator();
1235
1236 unsigned DestReg = MI.getOperand(0).getReg();
1237 unsigned IfTrueReg = MI.getOperand(1).getReg();
1238 unsigned IfFalseReg = MI.getOperand(2).getReg();
1239 unsigned CondCode = MI.getOperand(3).getImm();
1240 bool NZCVKilled = MI.getOperand(4).isKill();
1241
1242 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1243 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1244 MF->insert(It, TrueBB);
1245 MF->insert(It, EndBB);
1246
1247 // Transfer rest of current basic-block to EndBB
1248 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1249 MBB->end());
1250 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1251
1252 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1253 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1254 MBB->addSuccessor(TrueBB);
1255 MBB->addSuccessor(EndBB);
1256
1257 // TrueBB falls through to the end.
1258 TrueBB->addSuccessor(EndBB);
1259
1260 if (!NZCVKilled) {
1261 TrueBB->addLiveIn(AArch64::NZCV);
1262 EndBB->addLiveIn(AArch64::NZCV);
1263 }
1264
1265 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1266 .addReg(IfTrueReg)
1267 .addMBB(TrueBB)
1268 .addReg(IfFalseReg)
1269 .addMBB(MBB);
1270
1271 MI.eraseFromParent();
1272 return EndBB;
1273}
1274
1275MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
1276 MachineInstr &MI, MachineBasicBlock *BB) const {
1277 switch (MI.getOpcode()) {
1278 default:
1279#ifndef NDEBUG
1280 MI.dump();
1281#endif
1282 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1282)
;
1283
1284 case AArch64::F128CSEL:
1285 return EmitF128CSEL(MI, BB);
1286
1287 case TargetOpcode::STACKMAP:
1288 case TargetOpcode::PATCHPOINT:
1289 return emitPatchPoint(MI, BB);
1290 }
1291}
1292
1293//===----------------------------------------------------------------------===//
1294// AArch64 Lowering private implementation.
1295//===----------------------------------------------------------------------===//
1296
1297//===----------------------------------------------------------------------===//
1298// Lowering Code
1299//===----------------------------------------------------------------------===//
1300
1301/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1302/// CC
1303static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1304 switch (CC) {
1305 default:
1306 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1306)
;
1307 case ISD::SETNE:
1308 return AArch64CC::NE;
1309 case ISD::SETEQ:
1310 return AArch64CC::EQ;
1311 case ISD::SETGT:
1312 return AArch64CC::GT;
1313 case ISD::SETGE:
1314 return AArch64CC::GE;
1315 case ISD::SETLT:
1316 return AArch64CC::LT;
1317 case ISD::SETLE:
1318 return AArch64CC::LE;
1319 case ISD::SETUGT:
1320 return AArch64CC::HI;
1321 case ISD::SETUGE:
1322 return AArch64CC::HS;
1323 case ISD::SETULT:
1324 return AArch64CC::LO;
1325 case ISD::SETULE:
1326 return AArch64CC::LS;
1327 }
1328}
1329
1330/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1331static void changeFPCCToAArch64CC(ISD::CondCode CC,
1332 AArch64CC::CondCode &CondCode,
1333 AArch64CC::CondCode &CondCode2) {
1334 CondCode2 = AArch64CC::AL;
1335 switch (CC) {
1336 default:
1337 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1337)
;
1338 case ISD::SETEQ:
1339 case ISD::SETOEQ:
1340 CondCode = AArch64CC::EQ;
1341 break;
1342 case ISD::SETGT:
1343 case ISD::SETOGT:
1344 CondCode = AArch64CC::GT;
1345 break;
1346 case ISD::SETGE:
1347 case ISD::SETOGE:
1348 CondCode = AArch64CC::GE;
1349 break;
1350 case ISD::SETOLT:
1351 CondCode = AArch64CC::MI;
1352 break;
1353 case ISD::SETOLE:
1354 CondCode = AArch64CC::LS;
1355 break;
1356 case ISD::SETONE:
1357 CondCode = AArch64CC::MI;
1358 CondCode2 = AArch64CC::GT;
1359 break;
1360 case ISD::SETO:
1361 CondCode = AArch64CC::VC;
1362 break;
1363 case ISD::SETUO:
1364 CondCode = AArch64CC::VS;
1365 break;
1366 case ISD::SETUEQ:
1367 CondCode = AArch64CC::EQ;
1368 CondCode2 = AArch64CC::VS;
1369 break;
1370 case ISD::SETUGT:
1371 CondCode = AArch64CC::HI;
1372 break;
1373 case ISD::SETUGE:
1374 CondCode = AArch64CC::PL;
1375 break;
1376 case ISD::SETLT:
1377 case ISD::SETULT:
1378 CondCode = AArch64CC::LT;
1379 break;
1380 case ISD::SETLE:
1381 case ISD::SETULE:
1382 CondCode = AArch64CC::LE;
1383 break;
1384 case ISD::SETNE:
1385 case ISD::SETUNE:
1386 CondCode = AArch64CC::NE;
1387 break;
1388 }
1389}
1390
1391/// Convert a DAG fp condition code to an AArch64 CC.
1392/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1393/// should be AND'ed instead of OR'ed.
1394static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1395 AArch64CC::CondCode &CondCode,
1396 AArch64CC::CondCode &CondCode2) {
1397 CondCode2 = AArch64CC::AL;
1398 switch (CC) {
1399 default:
1400 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1401 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1401, __extension__ __PRETTY_FUNCTION__))
;
1402 break;
1403 case ISD::SETONE:
1404 // (a one b)
1405 // == ((a olt b) || (a ogt b))
1406 // == ((a ord b) && (a une b))
1407 CondCode = AArch64CC::VC;
1408 CondCode2 = AArch64CC::NE;
1409 break;
1410 case ISD::SETUEQ:
1411 // (a ueq b)
1412 // == ((a uno b) || (a oeq b))
1413 // == ((a ule b) && (a uge b))
1414 CondCode = AArch64CC::PL;
1415 CondCode2 = AArch64CC::LE;
1416 break;
1417 }
1418}
1419
1420/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1421/// CC usable with the vector instructions. Fewer operations are available
1422/// without a real NZCV register, so we have to use less efficient combinations
1423/// to get the same effect.
1424static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1425 AArch64CC::CondCode &CondCode,
1426 AArch64CC::CondCode &CondCode2,
1427 bool &Invert) {
1428 Invert = false;
1429 switch (CC) {
1430 default:
1431 // Mostly the scalar mappings work fine.
1432 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1433 break;
1434 case ISD::SETUO:
1435 Invert = true;
1436 LLVM_FALLTHROUGH[[clang::fallthrough]];
1437 case ISD::SETO:
1438 CondCode = AArch64CC::MI;
1439 CondCode2 = AArch64CC::GE;
1440 break;
1441 case ISD::SETUEQ:
1442 case ISD::SETULT:
1443 case ISD::SETULE:
1444 case ISD::SETUGT:
1445 case ISD::SETUGE:
1446 // All of the compare-mask comparisons are ordered, but we can switch
1447 // between the two by a double inversion. E.g. ULE == !OGT.
1448 Invert = true;
1449 changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
1450 break;
1451 }
1452}
1453
1454static bool isLegalArithImmed(uint64_t C) {
1455 // Matches AArch64DAGToDAGISel::SelectArithImmed().
1456 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1457 LLVM_DEBUG(dbgs() << "Is imm " << Cdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
1458 << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
1459 return IsLegal;
1460}
1461
1462static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1463 const SDLoc &dl, SelectionDAG &DAG) {
1464 EVT VT = LHS.getValueType();
1465 const bool FullFP16 =
1466 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1467
1468 if (VT.isFloatingPoint()) {
1469 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1469, __extension__ __PRETTY_FUNCTION__))
;
1470 if (VT == MVT::f16 && !FullFP16) {
1471 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1472 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1473 VT = MVT::f32;
1474 }
1475 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1476 }
1477
1478 // The CMP instruction is just an alias for SUBS, and representing it as
1479 // SUBS means that it's possible to get CSE with subtract operations.
1480 // A later phase can perform the optimization of setting the destination
1481 // register to WZR/XZR if it ends up being unused.
1482 unsigned Opcode = AArch64ISD::SUBS;
1483
1484 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
1485 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1486 // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
1487 // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
1488 // can be set differently by this operation. It comes down to whether
1489 // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1490 // everything is fine. If not then the optimization is wrong. Thus general
1491 // comparisons are only valid if op2 != 0.
1492
1493 // So, finally, the only LLVM-native comparisons that don't mention C and V
1494 // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1495 // the absence of information about op2.
1496 Opcode = AArch64ISD::ADDS;
1497 RHS = RHS.getOperand(1);
1498 } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
1499 !isUnsignedIntSetCC(CC)) {
1500 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1501 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1502 // of the signed comparisons.
1503 Opcode = AArch64ISD::ANDS;
1504 RHS = LHS.getOperand(1);
1505 LHS = LHS.getOperand(0);
1506 }
1507
1508 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1509 .getValue(1);
1510}
1511
1512/// \defgroup AArch64CCMP CMP;CCMP matching
1513///
1514/// These functions deal with the formation of CMP;CCMP;... sequences.
1515/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1516/// a comparison. They set the NZCV flags to a predefined value if their
1517/// predicate is false. This allows to express arbitrary conjunctions, for
1518/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
1519/// expressed as:
1520/// cmp A
1521/// ccmp B, inv(CB), CA
1522/// check for CB flags
1523///
1524/// In general we can create code for arbitrary "... (and (and A B) C)"
1525/// sequences. We can also implement some "or" expressions, because "(or A B)"
1526/// is equivalent to "not (and (not A) (not B))" and we can implement some
1527/// negation operations:
1528/// We can negate the results of a single comparison by inverting the flags
1529/// used when the predicate fails and inverting the flags tested in the next
1530/// instruction; We can also negate the results of the whole previous
1531/// conditional compare sequence by inverting the flags tested in the next
1532/// instruction. However there is no way to negate the result of a partial
1533/// sequence.
1534///
1535/// Therefore on encountering an "or" expression we can negate the subtree on
1536/// one side and have to be able to push the negate to the leafs of the subtree
1537/// on the other side (see also the comments in code). As complete example:
1538/// "or (or (setCA (cmp A)) (setCB (cmp B)))
1539/// (and (setCC (cmp C)) (setCD (cmp D)))"
1540/// is transformed to
1541/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
1542/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1543/// and implemented as:
1544/// cmp C
1545/// ccmp D, inv(CD), CC
1546/// ccmp A, CA, inv(CD)
1547/// ccmp B, CB, inv(CA)
1548/// check for CB flags
1549/// A counterexample is "or (and A B) (and C D)" which cannot be implemented
1550/// by conditional compare sequences.
1551/// @{
1552
1553/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1554static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1555 ISD::CondCode CC, SDValue CCOp,
1556 AArch64CC::CondCode Predicate,
1557 AArch64CC::CondCode OutCC,
1558 const SDLoc &DL, SelectionDAG &DAG) {
1559 unsigned Opcode = 0;
1560 const bool FullFP16 =
1561 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1562
1563 if (LHS.getValueType().isFloatingPoint()) {
1564 assert(LHS.getValueType() != MVT::f128)(static_cast <bool> (LHS.getValueType() != MVT::f128) ?
void (0) : __assert_fail ("LHS.getValueType() != MVT::f128",
"/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1564, __extension__ __PRETTY_FUNCTION__))
;
1565 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
1566 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1567 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1568 }
1569 Opcode = AArch64ISD::FCCMP;
1570 } else if (RHS.getOpcode() == ISD::SUB) {
1571 SDValue SubOp0 = RHS.getOperand(0);
1572 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1573 // See emitComparison() on why we can only do this for SETEQ and SETNE.
1574 Opcode = AArch64ISD::CCMN;
1575 RHS = RHS.getOperand(1);
1576 }
1577 }
1578 if (Opcode == 0)
1579 Opcode = AArch64ISD::CCMP;
1580
1581 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1582 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1583 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1584 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1585 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1586}
1587
1588/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
1589/// CanPushNegate is set to true if we can push a negate operation through
1590/// the tree in a was that we are left with AND operations and negate operations
1591/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
1592/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
1593/// brought into such a form.
1594static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate,
1595 unsigned Depth = 0) {
1596 if (!Val.hasOneUse())
1597 return false;
1598 unsigned Opcode = Val->getOpcode();
1599 if (Opcode == ISD::SETCC) {
1600 if (Val->getOperand(0).getValueType() == MVT::f128)
1601 return false;
1602 CanNegate = true;
1603 return true;
1604 }
1605 // Protect against exponential runtime and stack overflow.
1606 if (Depth > 6)
1607 return false;
1608 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1609 SDValue O0 = Val->getOperand(0);
1610 SDValue O1 = Val->getOperand(1);
1611 bool CanNegateL;
1612 if (!isConjunctionDisjunctionTree(O0, CanNegateL, Depth+1))
1613 return false;
1614 bool CanNegateR;
1615 if (!isConjunctionDisjunctionTree(O1, CanNegateR, Depth+1))
1616 return false;
1617
1618 if (Opcode == ISD::OR) {
1619 // For an OR expression we need to be able to negate at least one side or
1620 // we cannot do the transformation at all.
1621 if (!CanNegateL && !CanNegateR)
1622 return false;
1623 // We can however change a (not (or x y)) to (and (not x) (not y)) if we
1624 // can negate the x and y subtrees.
1625 CanNegate = CanNegateL && CanNegateR;
1626 } else {
1627 // If the operands are OR expressions then we finally need to negate their
1628 // outputs, we can only do that for the operand with emitted last by
1629 // negating OutCC, not for both operands.
1630 bool NeedsNegOutL = O0->getOpcode() == ISD::OR;
1631 bool NeedsNegOutR = O1->getOpcode() == ISD::OR;
1632 if (NeedsNegOutL && NeedsNegOutR)
1633 return false;
1634 // We cannot negate an AND operation (it would become an OR),
1635 CanNegate = false;
1636 }
1637 return true;
1638 }
1639 return false;
1640}
1641
1642/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1643/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1644/// Tries to transform the given i1 producing node @p Val to a series compare
1645/// and conditional compare operations. @returns an NZCV flags producing node
1646/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1647/// transformation was not possible.
1648/// On recursive invocations @p PushNegate may be set to true to have negation
1649/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
1650/// for the comparisons in the current subtree; @p Depth limits the search
1651/// depth to avoid stack overflow.
1652static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val,
1653 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1654 AArch64CC::CondCode Predicate) {
1655 // We're at a tree leaf, produce a conditional comparison operation.
1656 unsigned Opcode = Val->getOpcode();
1657 if (Opcode == ISD::SETCC) {
1658 SDValue LHS = Val->getOperand(0);
1659 SDValue RHS = Val->getOperand(1);
1660 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1661 bool isInteger = LHS.getValueType().isInteger();
1662 if (Negate)
1663 CC = getSetCCInverse(CC, isInteger);
1664 SDLoc DL(Val);
1665 // Determine OutCC and handle FP special case.
1666 if (isInteger) {
1667 OutCC = changeIntCCToAArch64CC(CC);
1668 } else {
1669 assert(LHS.getValueType().isFloatingPoint())(static_cast <bool> (LHS.getValueType().isFloatingPoint
()) ? void (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1669, __extension__ __PRETTY_FUNCTION__))
;
1670 AArch64CC::CondCode ExtraCC;
1671 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1672 // Some floating point conditions can't be tested with a single condition
1673 // code. Construct an additional comparison in this case.
1674 if (ExtraCC != AArch64CC::AL) {
1675 SDValue ExtraCmp;
1676 if (!CCOp.getNode())
1677 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1678 else
1679 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1680 ExtraCC, DL, DAG);
1681 CCOp = ExtraCmp;
1682 Predicate = ExtraCC;
1683 }
1684 }
1685
1686 // Produce a normal comparison if we are first in the chain
1687 if (!CCOp)
1688 return emitComparison(LHS, RHS, CC, DL, DAG);
1689 // Otherwise produce a ccmp.
1690 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
1691 DAG);
1692 }
1693 assert((Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) &&(static_cast <bool> ((Opcode == ISD::AND || (Opcode == ISD
::OR && Val->hasOneUse())) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1694, __extension__ __PRETTY_FUNCTION__))
1694 "Valid conjunction/disjunction tree")(static_cast <bool> ((Opcode == ISD::AND || (Opcode == ISD
::OR && Val->hasOneUse())) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1694, __extension__ __PRETTY_FUNCTION__))
;
1695
1696 // Check if both sides can be transformed.
1697 SDValue LHS = Val->getOperand(0);
1698 SDValue RHS = Val->getOperand(1);
1699
1700 // In case of an OR we need to negate our operands and the result.
1701 // (A v B) <=> not(not(A) ^ not(B))
1702 bool NegateOpsAndResult = Opcode == ISD::OR;
1703 // We can negate the results of all previous operations by inverting the
1704 // predicate flags giving us a free negation for one side. The other side
1705 // must be negatable by itself.
1706 if (NegateOpsAndResult) {
1707 // See which side we can negate.
1708 bool CanNegateL;
1709 bool isValidL = isConjunctionDisjunctionTree(LHS, CanNegateL);
1710 assert(isValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (isValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("isValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1710, __extension__ __PRETTY_FUNCTION__))
;
1711 (void)isValidL;
1712
1713#ifndef NDEBUG
1714 bool CanNegateR;
1715 bool isValidR = isConjunctionDisjunctionTree(RHS, CanNegateR);
1716 assert(isValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (isValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("isValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1716, __extension__ __PRETTY_FUNCTION__))
;
1717 assert((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree")(static_cast <bool> ((CanNegateL || CanNegateR) &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("(CanNegateL || CanNegateR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1717, __extension__ __PRETTY_FUNCTION__))
;
1718#endif
1719
1720 // Order the side which we cannot negate to RHS so we can emit it first.
1721 if (!CanNegateL)
1722 std::swap(LHS, RHS);
1723 } else {
1724 bool NeedsNegOutL = LHS->getOpcode() == ISD::OR;
1725 assert((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) &&(static_cast <bool> ((!NeedsNegOutL || RHS->getOpcode
() != ISD::OR) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1726, __extension__ __PRETTY_FUNCTION__))
1726 "Valid conjunction/disjunction tree")(static_cast <bool> ((!NeedsNegOutL || RHS->getOpcode
() != ISD::OR) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1726, __extension__ __PRETTY_FUNCTION__))
;
1727 // Order the side where we need to negate the output flags to RHS so it
1728 // gets emitted first.
1729 if (NeedsNegOutL)
1730 std::swap(LHS, RHS);
1731 }
1732
1733 // Emit RHS. If we want to negate the tree we only need to push a negate
1734 // through if we are already in a PushNegate case, otherwise we can negate
1735 // the "flags to test" afterwards.
1736 AArch64CC::CondCode RHSCC;
1737 SDValue CmpR = emitConjunctionDisjunctionTreeRec(DAG, RHS, RHSCC, Negate,
1738 CCOp, Predicate);
1739 if (NegateOpsAndResult && !Negate)
1740 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1741 // Emit LHS. We may need to negate it.
1742 SDValue CmpL = emitConjunctionDisjunctionTreeRec(DAG, LHS, OutCC,
1743 NegateOpsAndResult, CmpR,
1744 RHSCC);
1745 // If we transformed an OR to and AND then we have to negate the result
1746 // (or absorb the Negate parameter).
1747 if (NegateOpsAndResult && !Negate)
1748 OutCC = AArch64CC::getInvertedCondCode(OutCC);
1749 return CmpL;
1750}
1751
1752/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1753/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1754/// \see emitConjunctionDisjunctionTreeRec().
1755static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
1756 AArch64CC::CondCode &OutCC) {
1757 bool CanNegate;
1758 if (!isConjunctionDisjunctionTree(Val, CanNegate))
1759 return SDValue();
1760
1761 return emitConjunctionDisjunctionTreeRec(DAG, Val, OutCC, false, SDValue(),
1762 AArch64CC::AL);
1763}
1764
1765/// @}
1766
1767static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1768 SDValue &AArch64cc, SelectionDAG &DAG,
1769 const SDLoc &dl) {
1770 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1771 EVT VT = RHS.getValueType();
1772 uint64_t C = RHSC->getZExtValue();
1773 if (!isLegalArithImmed(C)) {
1774 // Constant does not fit, try adjusting it by one?
1775 switch (CC) {
1776 default:
1777 break;
1778 case ISD::SETLT:
1779 case ISD::SETGE:
1780 if ((VT == MVT::i32 && C != 0x80000000 &&
1781 isLegalArithImmed((uint32_t)(C - 1))) ||
1782 (VT == MVT::i64 && C != 0x80000000ULL &&
1783 isLegalArithImmed(C - 1ULL))) {
1784 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1785 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1786 RHS = DAG.getConstant(C, dl, VT);
1787 }
1788 break;
1789 case ISD::SETULT:
1790 case ISD::SETUGE:
1791 if ((VT == MVT::i32 && C != 0 &&
1792 isLegalArithImmed((uint32_t)(C - 1))) ||
1793 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
1794 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1795 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1796 RHS = DAG.getConstant(C, dl, VT);
1797 }
1798 break;
1799 case ISD::SETLE:
1800 case ISD::SETGT:
1801 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
1802 isLegalArithImmed((uint32_t)(C + 1))) ||
1803 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
1804 isLegalArithImmed(C + 1ULL))) {
1805 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1806 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1807 RHS = DAG.getConstant(C, dl, VT);
1808 }
1809 break;
1810 case ISD::SETULE:
1811 case ISD::SETUGT:
1812 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
1813 isLegalArithImmed((uint32_t)(C + 1))) ||
1814 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
1815 isLegalArithImmed(C + 1ULL))) {
1816 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1817 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1818 RHS = DAG.getConstant(C, dl, VT);
1819 }
1820 break;
1821 }
1822 }
1823 }
1824 SDValue Cmp;
1825 AArch64CC::CondCode AArch64CC;
1826 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
1827 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
1828
1829 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
1830 // For the i8 operand, the largest immediate is 255, so this can be easily
1831 // encoded in the compare instruction. For the i16 operand, however, the
1832 // largest immediate cannot be encoded in the compare.
1833 // Therefore, use a sign extending load and cmn to avoid materializing the
1834 // -1 constant. For example,
1835 // movz w1, #65535
1836 // ldrh w0, [x0, #0]
1837 // cmp w0, w1
1838 // >
1839 // ldrsh w0, [x0, #0]
1840 // cmn w0, #1
1841 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
1842 // if and only if (sext LHS) == (sext RHS). The checks are in place to
1843 // ensure both the LHS and RHS are truly zero extended and to make sure the
1844 // transformation is profitable.
1845 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
1846 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
1847 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
1848 LHS.getNode()->hasNUsesOfValue(1, 0)) {
1849 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
1850 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
1851 SDValue SExt =
1852 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
1853 DAG.getValueType(MVT::i16));
1854 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
1855 RHS.getValueType()),
1856 CC, dl, DAG);
1857 AArch64CC = changeIntCCToAArch64CC(CC);
1858 }
1859 }
1860
1861 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
1862 if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
1863 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
1864 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
1865 }
1866 }
1867 }
1868
1869 if (!Cmp) {
1870 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
1871 AArch64CC = changeIntCCToAArch64CC(CC);
1872 }
1873 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
1874 return Cmp;
1875}
1876
1877static std::pair<SDValue, SDValue>
1878getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
1879 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1880, __extension__ __PRETTY_FUNCTION__))
1880 "Unsupported value type")(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1880, __extension__ __PRETTY_FUNCTION__))
;
1881 SDValue Value, Overflow;
1882 SDLoc DL(Op);
1883 SDValue LHS = Op.getOperand(0);
1884 SDValue RHS = Op.getOperand(1);
1885 unsigned Opc = 0;
1886 switch (Op.getOpcode()) {
1887 default:
1888 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1888)
;
1889 case ISD::SADDO:
1890 Opc = AArch64ISD::ADDS;
1891 CC = AArch64CC::VS;
1892 break;
1893 case ISD::UADDO:
1894 Opc = AArch64ISD::ADDS;
1895 CC = AArch64CC::HS;
1896 break;
1897 case ISD::SSUBO:
1898 Opc = AArch64ISD::SUBS;
1899 CC = AArch64CC::VS;
1900 break;
1901 case ISD::USUBO:
1902 Opc = AArch64ISD::SUBS;
1903 CC = AArch64CC::LO;
1904 break;
1905 // Multiply needs a little bit extra work.
1906 case ISD::SMULO:
1907 case ISD::UMULO: {
1908 CC = AArch64CC::NE;
1909 bool IsSigned = Op.getOpcode() == ISD::SMULO;
1910 if (Op.getValueType() == MVT::i32) {
1911 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1912 // For a 32 bit multiply with overflow check we want the instruction
1913 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
1914 // need to generate the following pattern:
1915 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
1916 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
1917 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
1918 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1919 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
1920 DAG.getConstant(0, DL, MVT::i64));
1921 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
1922 // operation. We need to clear out the upper 32 bits, because we used a
1923 // widening multiply that wrote all 64 bits. In the end this should be a
1924 // noop.
1925 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
1926 if (IsSigned) {
1927 // The signed overflow check requires more than just a simple check for
1928 // any bit set in the upper 32 bits of the result. These bits could be
1929 // just the sign bits of a negative number. To perform the overflow
1930 // check we have to arithmetic shift right the 32nd bit of the result by
1931 // 31 bits. Then we compare the result to the upper 32 bits.
1932 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
1933 DAG.getConstant(32, DL, MVT::i64));
1934 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
1935 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
1936 DAG.getConstant(31, DL, MVT::i64));
1937 // It is important that LowerBits is last, otherwise the arithmetic
1938 // shift will not be folded into the compare (SUBS).
1939 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
1940 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1941 .getValue(1);
1942 } else {
1943 // The overflow check for unsigned multiply is easy. We only need to
1944 // check if any of the upper 32 bits are set. This can be done with a
1945 // CMP (shifted register). For that we need to generate the following
1946 // pattern:
1947 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
1948 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
1949 DAG.getConstant(32, DL, MVT::i64));
1950 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1951 Overflow =
1952 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1953 DAG.getConstant(0, DL, MVT::i64),
1954 UpperBits).getValue(1);
1955 }
1956 break;
1957 }
1958 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Expected an i64 value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1958, __extension__ __PRETTY_FUNCTION__))
;
1959 // For the 64 bit multiply
1960 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1961 if (IsSigned) {
1962 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
1963 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
1964 DAG.getConstant(63, DL, MVT::i64));
1965 // It is important that LowerBits is last, otherwise the arithmetic
1966 // shift will not be folded into the compare (SUBS).
1967 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1968 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1969 .getValue(1);
1970 } else {
1971 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
1972 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1973 Overflow =
1974 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1975 DAG.getConstant(0, DL, MVT::i64),
1976 UpperBits).getValue(1);
1977 }
1978 break;
1979 }
1980 } // switch (...)
1981
1982 if (Opc) {
1983 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
1984
1985 // Emit the AArch64 operation with overflow check.
1986 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
1987 Overflow = Value.getValue(1);
1988 }
1989 return std::make_pair(Value, Overflow);
1990}
1991
1992SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
1993 RTLIB::Libcall Call) const {
1994 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
1995 return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
1996}
1997
1998// Returns true if the given Op is the overflow flag result of an overflow
1999// intrinsic operation.
2000static bool isOverflowIntrOpRes(SDValue Op) {
2001 unsigned Opc = Op.getOpcode();
2002 return (Op.getResNo() == 1 &&
2003 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
2004 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
2005}
2006
2007static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
2008 SDValue Sel = Op.getOperand(0);
2009 SDValue Other = Op.getOperand(1);
2010 SDLoc dl(Sel);
2011
2012 // If the operand is an overflow checking operation, invert the condition
2013 // code and kill the Not operation. I.e., transform:
2014 // (xor (overflow_op_bool, 1))
2015 // -->
2016 // (csel 1, 0, invert(cc), overflow_op_bool)
2017 // ... which later gets transformed to just a cset instruction with an
2018 // inverted condition code, rather than a cset + eor sequence.
2019 if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
2020 // Only lower legal XALUO ops.
2021 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2022 return SDValue();
2023
2024 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2025 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2026 AArch64CC::CondCode CC;
2027 SDValue Value, Overflow;
2028 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2029 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2030 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2031 CCVal, Overflow);
2032 }
2033 // If neither operand is a SELECT_CC, give up.
2034 if (Sel.getOpcode() != ISD::SELECT_CC)
2035 std::swap(Sel, Other);
2036 if (Sel.getOpcode() != ISD::SELECT_CC)
2037 return Op;
2038
2039 // The folding we want to perform is:
2040 // (xor x, (select_cc a, b, cc, 0, -1) )
2041 // -->
2042 // (csel x, (xor x, -1), cc ...)
2043 //
2044 // The latter will get matched to a CSINV instruction.
2045
2046 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2047 SDValue LHS = Sel.getOperand(0);
2048 SDValue RHS = Sel.getOperand(1);
2049 SDValue TVal = Sel.getOperand(2);
2050 SDValue FVal = Sel.getOperand(3);
2051
2052 // FIXME: This could be generalized to non-integer comparisons.
2053 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2054 return Op;
2055
2056 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2057 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2058
2059 // The values aren't constants, this isn't the pattern we're looking for.
2060 if (!CFVal || !CTVal)
2061 return Op;
2062
2063 // We can commute the SELECT_CC by inverting the condition. This
2064 // might be needed to make this fit into a CSINV pattern.
2065 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2066 std::swap(TVal, FVal);
2067 std::swap(CTVal, CFVal);
2068 CC = ISD::getSetCCInverse(CC, true);
2069 }
2070
2071 // If the constants line up, perform the transform!
2072 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2073 SDValue CCVal;
2074 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2075
2076 FVal = Other;
2077 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2078 DAG.getConstant(-1ULL, dl, Other.getValueType()));
2079
2080 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2081 CCVal, Cmp);
2082 }
2083
2084 return Op;
2085}
2086
2087static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2088 EVT VT = Op.getValueType();
2089
2090 // Let legalize expand this if it isn't a legal type yet.
2091 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2092 return SDValue();
2093
2094 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2095
2096 unsigned Opc;
2097 bool ExtraOp = false;
2098 switch (Op.getOpcode()) {
2099 default:
2100 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2100)
;
2101 case ISD::ADDC:
2102 Opc = AArch64ISD::ADDS;
2103 break;
2104 case ISD::SUBC:
2105 Opc = AArch64ISD::SUBS;
2106 break;
2107 case ISD::ADDE:
2108 Opc = AArch64ISD::ADCS;
2109 ExtraOp = true;
2110 break;
2111 case ISD::SUBE:
2112 Opc = AArch64ISD::SBCS;
2113 ExtraOp = true;
2114 break;
2115 }
2116
2117 if (!ExtraOp)
2118 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2119 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2120 Op.getOperand(2));
2121}
2122
2123static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
2124 // Let legalize expand this if it isn't a legal type yet.
2125 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
2126 return SDValue();
2127
2128 SDLoc dl(Op);
2129 AArch64CC::CondCode CC;
2130 // The actual operation that sets the overflow or carry flag.
2131 SDValue Value, Overflow;
2132 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2133
2134 // We use 0 and 1 as false and true values.
2135 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2136 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2137
2138 // We use an inverted condition, because the conditional select is inverted
2139 // too. This will allow it to be selected to a single instruction:
2140 // CSINC Wd, WZR, WZR, invert(cond).
2141 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2142 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2143 CCVal, Overflow);
2144
2145 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2146 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2147}
2148
2149// Prefetch operands are:
2150// 1: Address to prefetch
2151// 2: bool isWrite
2152// 3: int locality (0 = no locality ... 3 = extreme locality)
2153// 4: bool isDataCache
2154static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
2155 SDLoc DL(Op);
2156 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2157 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2158 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2159
2160 bool IsStream = !Locality;
2161 // When the locality number is set
2162 if (Locality) {
2163 // The front-end should have filtered out the out-of-range values
2164 assert(Locality <= 3 && "Prefetch locality out-of-range")(static_cast <bool> (Locality <= 3 && "Prefetch locality out-of-range"
) ? void (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2164, __extension__ __PRETTY_FUNCTION__))
;
2165 // The locality degree is the opposite of the cache speed.
2166 // Put the number the other way around.
2167 // The encoding starts at 0 for level 1
2168 Locality = 3 - Locality;
2169 }
2170
2171 // built the mask value encoding the expected behavior.
2172 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
2173 (!IsData << 3) | // IsDataCache bit
2174 (Locality << 1) | // Cache level bits
2175 (unsigned)IsStream; // Stream bit
2176 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2177 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2178}
2179
2180SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2181 SelectionDAG &DAG) const {
2182 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")(static_cast <bool> (Op.getValueType() == MVT::f128 &&
"Unexpected lowering") ? void (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2182, __extension__ __PRETTY_FUNCTION__))
;
2183
2184 RTLIB::Libcall LC;
2185 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
2186
2187 return LowerF128Call(Op, DAG, LC);
2188}
2189
2190SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2191 SelectionDAG &DAG) const {
2192 if (Op.getOperand(0).getValueType() != MVT::f128) {
2193 // It's legal except when f128 is involved
2194 return Op;
2195 }
2196
2197 RTLIB::Libcall LC;
2198 LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
2199
2200 // FP_ROUND node has a second operand indicating whether it is known to be
2201 // precise. That doesn't take part in the LibCall so we can't directly use
2202 // LowerF128Call.
2203 SDValue SrcVal = Op.getOperand(0);
2204 return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
2205 SDLoc(Op)).first;
2206}
2207
2208static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2209 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2210 // Any additional optimization in this function should be recorded
2211 // in the cost tables.
2212 EVT InVT = Op.getOperand(0).getValueType();
2213 EVT VT = Op.getValueType();
2214 unsigned NumElts = InVT.getVectorNumElements();
2215
2216 // f16 vectors are promoted to f32 before a conversion.
2217 if (InVT.getVectorElementType() == MVT::f16) {
2218 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2219 SDLoc dl(Op);
2220 return DAG.getNode(
2221 Op.getOpcode(), dl, Op.getValueType(),
2222 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2223 }
2224
2225 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2226 SDLoc dl(Op);
2227 SDValue Cv =
2228 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2229 Op.getOperand(0));
2230 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2231 }
2232
2233 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2234 SDLoc dl(Op);
2235 MVT ExtVT =
2236 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
2237 VT.getVectorNumElements());
2238 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2239 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2240 }
2241
2242 // Type changing conversions are illegal.
2243 return Op;
2244}
2245
2246SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2247 SelectionDAG &DAG) const {
2248 if (Op.getOperand(0).getValueType().isVector())
2249 return LowerVectorFP_TO_INT(Op, DAG);
2250
2251 // f16 conversions are promoted to f32 when full fp16 is not supported.
2252 if (Op.getOperand(0).getValueType() == MVT::f16 &&
2253 !Subtarget->hasFullFP16()) {
2254 SDLoc dl(Op);
2255 return DAG.getNode(
2256 Op.getOpcode(), dl, Op.getValueType(),
2257 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
2258 }
2259
2260 if (Op.getOperand(0).getValueType() != MVT::f128) {
2261 // It's legal except when f128 is involved
2262 return Op;
2263 }
2264
2265 RTLIB::Libcall LC;
2266 if (Op.getOpcode() == ISD::FP_TO_SINT)
2267 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
2268 else
2269 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
2270
2271 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2272 return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
2273}
2274
2275static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2276 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2277 // Any additional optimization in this function should be recorded
2278 // in the cost tables.
2279 EVT VT = Op.getValueType();
2280 SDLoc dl(Op);
2281 SDValue In = Op.getOperand(0);
2282 EVT InVT = In.getValueType();
2283
2284 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2285 MVT CastVT =
2286 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
2287 InVT.getVectorNumElements());
2288 In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2289 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2290 }
2291
2292 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2293 unsigned CastOpc =
2294 Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2295 EVT CastVT = VT.changeVectorElementTypeToInteger();
2296 In = DAG.getNode(CastOpc, dl, CastVT, In);
2297 return DAG.getNode(Op.getOpcode(), dl, VT, In);
2298 }
2299
2300 return Op;
2301}
2302
2303SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2304 SelectionDAG &DAG) const {
2305 if (Op.getValueType().isVector())
2306 return LowerVectorINT_TO_FP(Op, DAG);
2307
2308 // f16 conversions are promoted to f32 when full fp16 is not supported.
2309 if (Op.getValueType() == MVT::f16 &&
2310 !Subtarget->hasFullFP16()) {
2311 SDLoc dl(Op);
2312 return DAG.getNode(
2313 ISD::FP_ROUND, dl, MVT::f16,
2314 DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
2315 DAG.getIntPtrConstant(0, dl));
2316 }
2317
2318 // i128 conversions are libcalls.
2319 if (Op.getOperand(0).getValueType() == MVT::i128)
2320 return SDValue();
2321
2322 // Other conversions are legal, unless it's to the completely software-based
2323 // fp128.
2324 if (Op.getValueType() != MVT::f128)
2325 return Op;
2326
2327 RTLIB::Libcall LC;
2328 if (Op.getOpcode() == ISD::SINT_TO_FP)
2329 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2330 else
2331 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2332
2333 return LowerF128Call(Op, DAG, LC);
2334}
2335
2336SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2337 SelectionDAG &DAG) const {
2338 // For iOS, we want to call an alternative entry point: __sincos_stret,
2339 // which returns the values in two S / D registers.
2340 SDLoc dl(Op);
2341 SDValue Arg = Op.getOperand(0);
2342 EVT ArgVT = Arg.getValueType();
2343 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2344
2345 ArgListTy Args;
2346 ArgListEntry Entry;
2347
2348 Entry.Node = Arg;
2349 Entry.Ty = ArgTy;
2350 Entry.IsSExt = false;
2351 Entry.IsZExt = false;
2352 Args.push_back(Entry);
2353
2354 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
2355 : RTLIB::SINCOS_STRET_F32;
2356 const char *LibcallName = getLibcallName(LC);
2357 SDValue Callee =
2358 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2359
2360 StructType *RetTy = StructType::get(ArgTy, ArgTy);
2361 TargetLowering::CallLoweringInfo CLI(DAG);
2362 CLI.setDebugLoc(dl)
2363 .setChain(DAG.getEntryNode())
2364 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2365
2366 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2367 return CallResult.first;
2368}
2369
2370static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2371 if (Op.getValueType() != MVT::f16)
2372 return SDValue();
2373
2374 assert(Op.getOperand(0).getValueType() == MVT::i16)(static_cast <bool> (Op.getOperand(0).getValueType() ==
MVT::i16) ? void (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2374, __extension__ __PRETTY_FUNCTION__))
;
2375 SDLoc DL(Op);
2376
2377 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2378 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2379 return SDValue(
2380 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2381 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2382 0);
2383}
2384
2385static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2386 if (OrigVT.getSizeInBits() >= 64)
2387 return OrigVT;
2388
2389 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2389, __extension__ __PRETTY_FUNCTION__))
;
2390
2391 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2392 switch (OrigSimpleTy) {
2393 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2393)
;
2394 case MVT::v2i8:
2395 case MVT::v2i16:
2396 return MVT::v2i32;
2397 case MVT::v4i8:
2398 return MVT::v4i16;
2399 }
2400}
2401
2402static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2403 const EVT &OrigTy,
2404 const EVT &ExtTy,
2405 unsigned ExtOpcode) {
2406 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2407 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2408 // 64-bits we need to insert a new extension so that it will be 64-bits.
2409 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2409, __extension__ __PRETTY_FUNCTION__))
;
2410 if (OrigTy.getSizeInBits() >= 64)
2411 return N;
2412
2413 // Must extend size to at least 64 bits to be used as an operand for VMULL.
2414 EVT NewVT = getExtensionTo64Bits(OrigTy);
2415
2416 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2417}
2418
2419static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2420 bool isSigned) {
2421 EVT VT = N->getValueType(0);
2422
2423 if (N->getOpcode() != ISD::BUILD_VECTOR)
2424 return false;
2425
2426 for (const SDValue &Elt : N->op_values()) {
2427 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2428 unsigned EltSize = VT.getScalarSizeInBits();
2429 unsigned HalfSize = EltSize / 2;
2430 if (isSigned) {
2431 if (!isIntN(HalfSize, C->getSExtValue()))
2432 return false;
2433 } else {
2434 if (!isUIntN(HalfSize, C->getZExtValue()))
2435 return false;
2436 }
2437 continue;
2438 }
2439 return false;
2440 }
2441
2442 return true;
2443}
2444
2445static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2446 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2447 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2448 N->getOperand(0)->getValueType(0),
2449 N->getValueType(0),
2450 N->getOpcode());
2451
2452 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2452, __extension__ __PRETTY_FUNCTION__))
;
2453 EVT VT = N->getValueType(0);
2454 SDLoc dl(N);
2455 unsigned EltSize = VT.getScalarSizeInBits() / 2;
2456 unsigned NumElts = VT.getVectorNumElements();
2457 MVT TruncVT = MVT::getIntegerVT(EltSize);
2458 SmallVector<SDValue, 8> Ops;
2459 for (unsigned i = 0; i != NumElts; ++i) {
2460 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2461 const APInt &CInt = C->getAPIntValue();
2462 // Element types smaller than 32 bits are not legal, so use i32 elements.
2463 // The values are implicitly truncated so sext vs. zext doesn't matter.
2464 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2465 }
2466 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2467}
2468
2469static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2470 return N->getOpcode() == ISD::SIGN_EXTEND ||
2471 isExtendedBUILD_VECTOR(N, DAG, true);
2472}
2473
2474static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2475 return N->getOpcode() == ISD::ZERO_EXTEND ||
2476 isExtendedBUILD_VECTOR(N, DAG, false);
2477}
2478
2479static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2480 unsigned Opcode = N->getOpcode();
2481 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2482 SDNode *N0 = N->getOperand(0).getNode();
2483 SDNode *N1 = N->getOperand(1).getNode();
2484 return N0->hasOneUse() && N1->hasOneUse() &&
2485 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2486 }
2487 return false;
2488}
2489
2490static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2491 unsigned Opcode = N->getOpcode();
2492 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2493 SDNode *N0 = N->getOperand(0).getNode();
2494 SDNode *N1 = N->getOperand(1).getNode();
2495 return N0->hasOneUse() && N1->hasOneUse() &&
2496 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2497 }
2498 return false;
2499}
2500
2501SDValue AArch64TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
2502 SelectionDAG &DAG) const {
2503 // The rounding mode is in bits 23:22 of the FPSCR.
2504 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
2505 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
2506 // so that the shift + and get folded into a bitfield extract.
2507 SDLoc dl(Op);
2508
2509 SDValue FPCR_64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i64,
2510 DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl,
2511 MVT::i64));
2512 SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
2513 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
2514 DAG.getConstant(1U << 22, dl, MVT::i32));
2515 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
2516 DAG.getConstant(22, dl, MVT::i32));
2517 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
2518 DAG.getConstant(3, dl, MVT::i32));
2519}
2520
2521static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2522 // Multiplications are only custom-lowered for 128-bit vectors so that
2523 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2524 EVT VT = Op.getValueType();
2525 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2526, __extension__ __PRETTY_FUNCTION__))
2526 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2526, __extension__ __PRETTY_FUNCTION__))
;
2527 SDNode *N0 = Op.getOperand(0).getNode();
2528 SDNode *N1 = Op.getOperand(1).getNode();
2529 unsigned NewOpc = 0;
2530 bool isMLA = false;
2531 bool isN0SExt = isSignExtended(N0, DAG);
2532 bool isN1SExt = isSignExtended(N1, DAG);
2533 if (isN0SExt && isN1SExt)
2534 NewOpc = AArch64ISD::SMULL;
2535 else {
2536 bool isN0ZExt = isZeroExtended(N0, DAG);
2537 bool isN1ZExt = isZeroExtended(N1, DAG);
2538 if (isN0ZExt && isN1ZExt)
2539 NewOpc = AArch64ISD::UMULL;
2540 else if (isN1SExt || isN1ZExt) {
2541 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2542 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2543 if (isN1SExt && isAddSubSExt(N0, DAG)) {
2544 NewOpc = AArch64ISD::SMULL;
2545 isMLA = true;
2546 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2547 NewOpc = AArch64ISD::UMULL;
2548 isMLA = true;
2549 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2550 std::swap(N0, N1);
2551 NewOpc = AArch64ISD::UMULL;
2552 isMLA = true;
2553 }
2554 }
2555
2556 if (!NewOpc) {
2557 if (VT == MVT::v2i64)
2558 // Fall through to expand this. It is not legal.
2559 return SDValue();
2560 else
2561 // Other vector multiplications are legal.
2562 return Op;
2563 }
2564 }
2565
2566 // Legalize to a S/UMULL instruction
2567 SDLoc DL(Op);
2568 SDValue Op0;
2569 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2570 if (!isMLA) {
2571 Op0 = skipExtensionForVectorMULL(N0, DAG);
2572 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2574, __extension__ __PRETTY_FUNCTION__))
2573 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2574, __extension__ __PRETTY_FUNCTION__))
2574 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2574, __extension__ __PRETTY_FUNCTION__))
;
2575 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2576 }
2577 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2578 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2579 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2580 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2581 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2582 EVT Op1VT = Op1.getValueType();
2583 return DAG.getNode(N0->getOpcode(), DL, VT,
2584 DAG.getNode(NewOpc, DL, VT,
2585 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2586 DAG.getNode(NewOpc, DL, VT,
2587 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2588}
2589
2590// Lower vector multiply high (ISD::MULHS and ISD::MULHU).
2591static SDValue LowerMULH(SDValue Op, SelectionDAG &DAG) {
2592 // Multiplications are only custom-lowered for 128-bit vectors so that
2593 // {S,U}MULL{2} can be detected. Otherwise v2i64 multiplications are not
2594 // legal.
2595 EVT VT = Op.getValueType();
2596 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MULH{U,S}"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MULH{U,S}\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2597, __extension__ __PRETTY_FUNCTION__))
2597 "unexpected type for custom-lowering ISD::MULH{U,S}")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MULH{U,S}"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MULH{U,S}\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2597, __extension__ __PRETTY_FUNCTION__))
;
2598
2599 SDValue V0 = Op.getOperand(0);
2600 SDValue V1 = Op.getOperand(1);
2601
2602 SDLoc DL(Op);
2603
2604 EVT ExtractVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
2605
2606 // We turn (V0 mulhs/mulhu V1) to:
2607 //
2608 // (uzp2 (smull (extract_subvector (ExtractVT V128:V0, (i64 0)),
2609 // (extract_subvector (ExtractVT V128:V1, (i64 0))))),
2610 // (smull (extract_subvector (ExtractVT V128:V0, (i64 VMull2Idx)),
2611 // (extract_subvector (ExtractVT V128:V2, (i64 VMull2Idx))))))
2612 //
2613 // Where ExtractVT is a subvector with half number of elements, and
2614 // VMullIdx2 is the index of the middle element (the high part).
2615 //
2616 // The vector hight part extract and multiply will be matched against
2617 // {S,U}MULL{v16i8_v8i16,v8i16_v4i32,v4i32_v2i64} which in turn will
2618 // issue a {s}mull2 instruction.
2619 //
2620 // This basically multiply the lower subvector with '{s,u}mull', the high
2621 // subvector with '{s,u}mull2', and shuffle both results high part in
2622 // resulting vector.
2623 unsigned Mull2VectorIdx = VT.getVectorNumElements () / 2;
2624 SDValue VMullIdx = DAG.getConstant(0, DL, MVT::i64);
2625 SDValue VMull2Idx = DAG.getConstant(Mull2VectorIdx, DL, MVT::i64);
2626
2627 SDValue VMullV0 =
2628 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V0, VMullIdx);
2629 SDValue VMullV1 =
2630 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, VMullIdx);
2631
2632 SDValue VMull2V0 =
2633 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V0, VMull2Idx);
2634 SDValue VMull2V1 =
2635 DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, V1, VMull2Idx);
2636
2637 unsigned MullOpc = Op.getOpcode() == ISD::MULHS ? AArch64ISD::SMULL
2638 : AArch64ISD::UMULL;
2639
2640 EVT MullVT = ExtractVT.widenIntegerVectorElementType(*DAG.getContext());
2641 SDValue Mull = DAG.getNode(MullOpc, DL, MullVT, VMullV0, VMullV1);
2642 SDValue Mull2 = DAG.getNode(MullOpc, DL, MullVT, VMull2V0, VMull2V1);
2643
2644 Mull = DAG.getNode(ISD::BITCAST, DL, VT, Mull);
2645 Mull2 = DAG.getNode(ISD::BITCAST, DL, VT, Mull2);
2646
2647 return DAG.getNode(AArch64ISD::UZP2, DL, VT, Mull, Mull2);
2648}
2649
2650SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2651 SelectionDAG &DAG) const {
2652 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2653 SDLoc dl(Op);
2654 switch (IntNo) {
2655 default: return SDValue(); // Don't custom lower most intrinsics.
2656 case Intrinsic::thread_pointer: {
2657 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2658 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2659 }
2660 case Intrinsic::aarch64_neon_abs:
2661 return DAG.getNode(ISD::ABS, dl, Op.getValueType(),
2662 Op.getOperand(1));
2663 case Intrinsic::aarch64_neon_smax:
2664 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2665 Op.getOperand(1), Op.getOperand(2));
2666 case Intrinsic::aarch64_neon_umax:
2667 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2668 Op.getOperand(1), Op.getOperand(2));
2669 case Intrinsic::aarch64_neon_smin:
2670 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2671 Op.getOperand(1), Op.getOperand(2));
2672 case Intrinsic::aarch64_neon_umin:
2673 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2674 Op.getOperand(1), Op.getOperand(2));
2675 }
2676}
2677
2678// Custom lower trunc store for v4i8 vectors, since it is promoted to v4i16.
2679static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
2680 EVT VT, EVT MemVT,
2681 SelectionDAG &DAG) {
2682 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2682, __extension__ __PRETTY_FUNCTION__))
;
2683 assert(MemVT == MVT::v4i8 && VT == MVT::v4i16)(static_cast <bool> (MemVT == MVT::v4i8 && VT ==
MVT::v4i16) ? void (0) : __assert_fail ("MemVT == MVT::v4i8 && VT == MVT::v4i16"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2683, __extension__ __PRETTY_FUNCTION__))
;
2684
2685 SDValue Value = ST->getValue();
2686
2687 // It first extend the promoted v4i16 to v8i16, truncate to v8i8, and extract
2688 // the word lane which represent the v4i8 subvector. It optimizes the store
2689 // to:
2690 //
2691 // xtn v0.8b, v0.8h
2692 // str s0, [x0]
2693
2694 SDValue Undef = DAG.getUNDEF(MVT::i16);
2695 SDValue UndefVec = DAG.getBuildVector(MVT::v4i16, DL,
2696 {Undef, Undef, Undef, Undef});
2697
2698 SDValue TruncExt = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16,
2699 Value, UndefVec);
2700 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, TruncExt);
2701
2702 Trunc = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Trunc);
2703 SDValue ExtractTrunc = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
2704 Trunc, DAG.getConstant(0, DL, MVT::i64));
2705
2706 return DAG.getStore(ST->getChain(), DL, ExtractTrunc,
2707 ST->getBasePtr(), ST->getMemOperand());
2708}
2709
2710// Custom lowering for any store, vector or scalar and/or default or with
2711// a truncate operations. Currently only custom lower truncate operation
2712// from vector v4i16 to v4i8.
2713SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
2714 SelectionDAG &DAG) const {
2715 SDLoc Dl(Op);
2716 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
2717 assert (StoreNode && "Can only custom lower store nodes")(static_cast <bool> (StoreNode && "Can only custom lower store nodes"
) ? void (0) : __assert_fail ("StoreNode && \"Can only custom lower store nodes\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2717, __extension__ __PRETTY_FUNCTION__))
;
2718
2719 SDValue Value = StoreNode->getValue();
2720
2721 EVT VT = Value.getValueType();
2722 EVT MemVT = StoreNode->getMemoryVT();
2723
2724 assert (VT.isVector() && "Can only custom lower vector store types")(static_cast <bool> (VT.isVector() && "Can only custom lower vector store types"
) ? void (0) : __assert_fail ("VT.isVector() && \"Can only custom lower vector store types\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2724, __extension__ __PRETTY_FUNCTION__))
;
2725
2726 unsigned AS = StoreNode->getAddressSpace();
2727 unsigned Align = StoreNode->getAlignment();
2728 if (Align < MemVT.getStoreSize() &&
2729 !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
2730 return scalarizeVectorStore(StoreNode, DAG);
2731 }
2732
2733 if (StoreNode->isTruncatingStore()) {
2734 return LowerTruncateVectorStore(Dl, StoreNode, VT, MemVT, DAG);
2735 }
2736
2737 return SDValue();
2738}
2739
2740SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
2741 SelectionDAG &DAG) const {
2742 LLVM_DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
2743 LLVM_DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
2744
2745 switch (Op.getOpcode()) {
2746 default:
2747 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2747)
;
2748 return SDValue();
2749 case ISD::BITCAST:
2750 return LowerBITCAST(Op, DAG);
2751 case ISD::GlobalAddress:
2752 return LowerGlobalAddress(Op, DAG);
2753 case ISD::GlobalTLSAddress:
2754 return LowerGlobalTLSAddress(Op, DAG);
2755 case ISD::SETCC:
2756 return LowerSETCC(Op, DAG);
2757 case ISD::BR_CC:
2758 return LowerBR_CC(Op, DAG);
2759 case ISD::SELECT:
2760 return LowerSELECT(Op, DAG);
2761 case ISD::SELECT_CC:
2762 return LowerSELECT_CC(Op, DAG);
2763 case ISD::JumpTable:
2764 return LowerJumpTable(Op, DAG);
2765 case ISD::ConstantPool:
2766 return LowerConstantPool(Op, DAG);
2767 case ISD::BlockAddress:
2768 return LowerBlockAddress(Op, DAG);
2769 case ISD::VASTART:
2770 return LowerVASTART(Op, DAG);
2771 case ISD::VACOPY:
2772 return LowerVACOPY(Op, DAG);
2773 case ISD::VAARG:
2774 return LowerVAARG(Op, DAG);
2775 case ISD::ADDC:
2776 case ISD::ADDE:
2777 case ISD::SUBC:
2778 case ISD::SUBE:
2779 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
2780 case ISD::SADDO:
2781 case ISD::UADDO:
2782 case ISD::SSUBO:
2783 case ISD::USUBO:
2784 case ISD::SMULO:
2785 case ISD::UMULO:
2786 return LowerXALUO(Op, DAG);
2787 case ISD::FADD:
2788 return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
2789 case ISD::FSUB:
2790 return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
2791 case ISD::FMUL:
2792 return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
2793 case ISD::FDIV:
2794 return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
2795 case ISD::FP_ROUND:
2796 return LowerFP_ROUND(Op, DAG);
2797 case ISD::FP_EXTEND:
2798 return LowerFP_EXTEND(Op, DAG);
2799 case ISD::FRAMEADDR:
2800 return LowerFRAMEADDR(Op, DAG);
2801 case ISD::RETURNADDR:
2802 return LowerRETURNADDR(Op, DAG);
2803 case ISD::INSERT_VECTOR_ELT:
2804 return LowerINSERT_VECTOR_ELT(Op, DAG);
2805 case ISD::EXTRACT_VECTOR_ELT:
2806 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2807 case ISD::BUILD_VECTOR:
2808 return LowerBUILD_VECTOR(Op, DAG);
2809 case ISD::VECTOR_SHUFFLE:
2810 return LowerVECTOR_SHUFFLE(Op, DAG);
2811 case ISD::EXTRACT_SUBVECTOR:
2812 return LowerEXTRACT_SUBVECTOR(Op, DAG);
2813 case ISD::SRA:
2814 case ISD::SRL:
2815 case ISD::SHL:
2816 return LowerVectorSRA_SRL_SHL(Op, DAG);
2817 case ISD::SHL_PARTS:
2818 return LowerShiftLeftParts(Op, DAG);
2819 case ISD::SRL_PARTS:
2820 case ISD::SRA_PARTS:
2821 return LowerShiftRightParts(Op, DAG);
2822 case ISD::CTPOP:
2823 return LowerCTPOP(Op, DAG);
2824 case ISD::FCOPYSIGN:
2825 return LowerFCOPYSIGN(Op, DAG);
2826 case ISD::AND:
2827 return LowerVectorAND(Op, DAG);
2828 case ISD::OR:
2829 return LowerVectorOR(Op, DAG);
2830 case ISD::XOR:
2831 return LowerXOR(Op, DAG);
2832 case ISD::PREFETCH:
2833 return LowerPREFETCH(Op, DAG);
2834 case ISD::SINT_TO_FP:
2835 case ISD::UINT_TO_FP:
2836 return LowerINT_TO_FP(Op, DAG);
2837 case ISD::FP_TO_SINT:
2838 case ISD::FP_TO_UINT:
2839 return LowerFP_TO_INT(Op, DAG);
2840 case ISD::FSINCOS:
2841 return LowerFSINCOS(Op, DAG);
2842 case ISD::FLT_ROUNDS_:
2843 return LowerFLT_ROUNDS_(Op, DAG);
2844 case ISD::MUL:
2845 return LowerMUL(Op, DAG);
2846 case ISD::MULHS:
2847 case ISD::MULHU:
2848 return LowerMULH(Op, DAG);
2849 case ISD::INTRINSIC_WO_CHAIN:
2850 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2851 case ISD::STORE:
2852 return LowerSTORE(Op, DAG);
2853 case ISD::VECREDUCE_ADD:
2854 case ISD::VECREDUCE_SMAX:
2855 case ISD::VECREDUCE_SMIN:
2856 case ISD::VECREDUCE_UMAX:
2857 case ISD::VECREDUCE_UMIN:
2858 case ISD::VECREDUCE_FMAX:
2859 case ISD::VECREDUCE_FMIN:
2860 return LowerVECREDUCE(Op, DAG);
2861 case ISD::ATOMIC_LOAD_SUB:
2862 return LowerATOMIC_LOAD_SUB(Op, DAG);
2863 case ISD::ATOMIC_LOAD_AND:
2864 return LowerATOMIC_LOAD_AND(Op, DAG);
2865 case ISD::DYNAMIC_STACKALLOC:
2866 return LowerDYNAMIC_STACKALLOC(Op, DAG);
2867 }
2868}
2869
2870//===----------------------------------------------------------------------===//
2871// Calling Convention Implementation
2872//===----------------------------------------------------------------------===//
2873
2874#include "AArch64GenCallingConv.inc"
2875
2876/// Selects the correct CCAssignFn for a given CallingConvention value.
2877CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2878 bool IsVarArg) const {
2879 switch (CC) {
2880 default:
2881 report_fatal_error("Unsupported calling convention.");
2882 case CallingConv::WebKit_JS:
2883 return CC_AArch64_WebKit_JS;
2884 case CallingConv::GHC:
2885 return CC_AArch64_GHC;
2886 case CallingConv::C:
2887 case CallingConv::Fast:
2888 case CallingConv::PreserveMost:
2889 case CallingConv::CXX_FAST_TLS:
2890 case CallingConv::Swift:
2891 if (Subtarget->isTargetWindows() && IsVarArg)
2892 return CC_AArch64_Win64_VarArg;
2893 if (!Subtarget->isTargetDarwin())
2894 return CC_AArch64_AAPCS;
2895 return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
2896 case CallingConv::Win64:
2897 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
2898 }
2899}
2900
2901CCAssignFn *
2902AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
2903 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2904 : RetCC_AArch64_AAPCS;
2905}
2906
2907SDValue AArch64TargetLowering::LowerFormalArguments(
2908 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2909 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2910 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2911 MachineFunction &MF = DAG.getMachineFunction();
2912 MachineFrameInfo &MFI = MF.getFrameInfo();
2913 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
2914
2915 // Assign locations to all of the incoming arguments.
2916 SmallVector<CCValAssign, 16> ArgLocs;
2917 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2918 *DAG.getContext());
2919
2920 // At this point, Ins[].VT may already be promoted to i32. To correctly
2921 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
2922 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
2923 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
2924 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
2925 // LocVT.
2926 unsigned NumArgs = Ins.size();
2927 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
2928 unsigned CurArgIdx = 0;
2929 for (unsigned i = 0; i != NumArgs; ++i) {
2930 MVT ValVT = Ins[i].VT;
2931 if (Ins[i].isOrigArg()) {
2932 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
2933 CurArgIdx = Ins[i].getOrigArgIndex();
2934
2935 // Get type of the original argument.
2936 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
2937 /*AllowUnknown*/ true);
2938 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
2939 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
2940 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
2941 ValVT = MVT::i8;
2942 else if (ActualMVT == MVT::i16)
2943 ValVT = MVT::i16;
2944 }
2945 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
2946 bool Res =
2947 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
2948 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2948, __extension__ __PRETTY_FUNCTION__))
;
2949 (void)Res;
2950 }
2951 assert(ArgLocs.size() == Ins.size())(static_cast <bool> (ArgLocs.size() == Ins.size()) ? void
(0) : __assert_fail ("ArgLocs.size() == Ins.size()", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2951, __extension__ __PRETTY_FUNCTION__))
;
2952 SmallVector<SDValue, 16> ArgValues;
2953 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2954 CCValAssign &VA = ArgLocs[i];
2955
2956 if (Ins[i].Flags.isByVal()) {
2957 // Byval is used for HFAs in the PCS, but the system should work in a
2958 // non-compliant manner for larger structs.
2959 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2960 int Size = Ins[i].Flags.getByValSize();
2961 unsigned NumRegs = (Size + 7) / 8;
2962
2963 // FIXME: This works on big-endian for composite byvals, which are the common
2964 // case. It should also work for fundamental types too.
2965 unsigned FrameIdx =
2966 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
2967 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
2968 InVals.push_back(FrameIdxN);
2969
2970 continue;
2971 }
2972
2973 if (VA.isRegLoc()) {
2974 // Arguments stored in registers.
2975 EVT RegVT = VA.getLocVT();
2976
2977 SDValue ArgValue;
2978 const TargetRegisterClass *RC;
2979
2980 if (RegVT == MVT::i32)
2981 RC = &AArch64::GPR32RegClass;
2982 else if (RegVT == MVT::i64)
2983 RC = &AArch64::GPR64RegClass;
2984 else if (RegVT == MVT::f16)
2985 RC = &AArch64::FPR16RegClass;
2986 else if (RegVT == MVT::f32)
2987 RC = &AArch64::FPR32RegClass;
2988 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
2989 RC = &AArch64::FPR64RegClass;
2990 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
2991 RC = &AArch64::FPR128RegClass;
2992 else
2993 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2993)
;
2994
2995 // Transform the arguments in physical registers into virtual ones.
2996 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2997 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
2998
2999 // If this is an 8, 16 or 32-bit value, it is really passed promoted
3000 // to 64 bits. Insert an assert[sz]ext to capture this, then
3001 // truncate to the right size.
3002 switch (VA.getLocInfo()) {
3003 default:
3004 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3004)
;
3005 case CCValAssign::Full:
3006 break;
3007 case CCValAssign::BCvt:
3008 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
3009 break;
3010 case CCValAssign::AExt:
3011 case CCValAssign::SExt:
3012 case CCValAssign::ZExt:
3013 // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
3014 // nodes after our lowering.
3015 assert(RegVT == Ins[i].VT && "incorrect register location selected")(static_cast <bool> (RegVT == Ins[i].VT && "incorrect register location selected"
) ? void (0) : __assert_fail ("RegVT == Ins[i].VT && \"incorrect register location selected\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3015, __extension__ __PRETTY_FUNCTION__))
;
3016 break;
3017 }
3018
3019 InVals.push_back(ArgValue);
3020
3021 } else { // VA.isRegLoc()
3022 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")(static_cast <bool> (VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3022, __extension__ __PRETTY_FUNCTION__))
;
3023 unsigned ArgOffset = VA.getLocMemOffset();
3024 unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
3025
3026 uint32_t BEAlign = 0;
3027 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
3028 !Ins[i].Flags.isInConsecutiveRegs())
3029 BEAlign = 8 - ArgSize;
3030
3031 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
3032
3033 // Create load nodes to retrieve arguments from the stack.
3034 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3035 SDValue ArgValue;
3036
3037 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
3038 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
3039 MVT MemVT = VA.getValVT();
3040
3041 switch (VA.getLocInfo()) {
3042 default:
3043 break;
3044 case CCValAssign::BCvt:
3045 MemVT = VA.getLocVT();
3046 break;
3047 case CCValAssign::SExt:
3048 ExtType = ISD::SEXTLOAD;
3049 break;
3050 case CCValAssign::ZExt:
3051 ExtType = ISD::ZEXTLOAD;
3052 break;
3053 case CCValAssign::AExt:
3054 ExtType = ISD::EXTLOAD;
3055 break;
3056 }
3057
3058 ArgValue = DAG.getExtLoad(
3059 ExtType, DL, VA.getLocVT(), Chain, FIN,
3060 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
3061 MemVT);
3062
3063 InVals.push_back(ArgValue);
3064 }
3065 }
3066
3067 // varargs
3068 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3069 if (isVarArg) {
3070 if (!Subtarget->isTargetDarwin() || IsWin64) {
3071 // The AAPCS variadic function ABI is identical to the non-variadic
3072 // one. As a result there may be more arguments in registers and we should
3073 // save them for future reference.
3074 // Win64 variadic functions also pass arguments in registers, but all float
3075 // arguments are passed in integer registers.
3076 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
3077 }
3078
3079 // This will point to the next argument passed via stack.
3080 unsigned StackOffset = CCInfo.getNextStackOffset();
3081 // We currently pass all varargs at 8-byte alignment.
3082 StackOffset = ((StackOffset + 7) & ~7);
3083 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
3084 }
3085
3086 unsigned StackArgSize = CCInfo.getNextStackOffset();
3087 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3088 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
3089 // This is a non-standard ABI so by fiat I say we're allowed to make full
3090 // use of the stack area to be popped, which must be aligned to 16 bytes in
3091 // any case:
3092 StackArgSize = alignTo(StackArgSize, 16);
3093
3094 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
3095 // a multiple of 16.
3096 FuncInfo->setArgumentStackToRestore(StackArgSize);
3097
3098 // This realignment carries over to the available bytes below. Our own
3099 // callers will guarantee the space is free by giving an aligned value to
3100 // CALLSEQ_START.
3101 }
3102 // Even if we're not expected to free up the space, it's useful to know how
3103 // much is there while considering tail calls (because we can reuse it).
3104 FuncInfo->setBytesInStackArgArea(StackArgSize);
3105
3106 return Chain;
3107}
3108
3109void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
3110 SelectionDAG &DAG,
3111 const SDLoc &DL,
3112 SDValue &Chain) const {
3113 MachineFunction &MF = DAG.getMachineFunction();
3114 MachineFrameInfo &MFI = MF.getFrameInfo();
3115 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3116 auto PtrVT = getPointerTy(DAG.getDataLayout());
3117 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
3118
3119 SmallVector<SDValue, 8> MemOps;
3120
3121 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
3122 AArch64::X3, AArch64::X4, AArch64::X5,
3123 AArch64::X6, AArch64::X7 };
3124 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
3125 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
3126
3127 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
3128 int GPRIdx = 0;
3129 if (GPRSaveSize != 0) {
3130 if (IsWin64) {
3131 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
3132 if (GPRSaveSize & 15)
3133 // The extra size here, if triggered, will always be 8.
3134 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
3135 } else
3136 GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
3137
3138 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
3139
3140 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
3141 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
3142 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
3143 SDValue Store = DAG.getStore(
3144 Val.getValue(1), DL, Val, FIN,
3145 IsWin64
3146 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
3147 GPRIdx,
3148 (i - FirstVariadicGPR) * 8)
3149 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
3150 MemOps.push_back(Store);
3151 FIN =
3152 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
3153 }
3154 }
3155 FuncInfo->setVarArgsGPRIndex(GPRIdx);
3156 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
3157
3158 if (Subtarget->hasFPARMv8() && !IsWin64) {
3159 static const MCPhysReg FPRArgRegs[] = {
3160 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
3161 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
3162 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
3163 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
3164
3165 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
3166 int FPRIdx = 0;
3167 if (FPRSaveSize != 0) {
3168 FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
3169
3170 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
3171
3172 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
3173 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
3174 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
3175
3176 SDValue Store = DAG.getStore(
3177 Val.getValue(1), DL, Val, FIN,
3178 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
3179 MemOps.push_back(Store);
3180 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3181 DAG.getConstant(16, DL, PtrVT));
3182 }
3183 }
3184 FuncInfo->setVarArgsFPRIndex(FPRIdx);
3185 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3186 }
3187
3188 if (!MemOps.empty()) {
3189 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3190 }
3191}
3192
3193/// LowerCallResult - Lower the result values of a call into the
3194/// appropriate copies out of appropriate physical registers.
3195SDValue AArch64TargetLowering::LowerCallResult(
3196 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3197 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3198 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3199 SDValue ThisVal) const {
3200 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3201 ? RetCC_AArch64_WebKit_JS
3202 : RetCC_AArch64_AAPCS;
3203 // Assign locations to each value returned by this call.
3204 SmallVector<CCValAssign, 16> RVLocs;
3205 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3206 *DAG.getContext());
3207 CCInfo.AnalyzeCallResult(Ins, RetCC);
3208
3209 // Copy all of the result registers out of their specified physreg.
3210 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3211 CCValAssign VA = RVLocs[i];
3212
3213 // Pass 'this' value directly from the argument to return value, to avoid
3214 // reg unit interference
3215 if (i == 0 && isThisReturn) {
3216 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3217, __extension__ __PRETTY_FUNCTION__))
3217 "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3217, __extension__ __PRETTY_FUNCTION__))
;
3218 InVals.push_back(ThisVal);
3219 continue;
3220 }
3221
3222 SDValue Val =
3223 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3224 Chain = Val.getValue(1);
3225 InFlag = Val.getValue(2);
3226
3227 switch (VA.getLocInfo()) {
3228 default:
3229 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3229)
;
3230 case CCValAssign::Full:
3231 break;
3232 case CCValAssign::BCvt:
3233 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3234 break;
3235 }
3236
3237 InVals.push_back(Val);
3238 }
3239
3240 return Chain;
3241}
3242
3243/// Return true if the calling convention is one that we can guarantee TCO for.
3244static bool canGuaranteeTCO(CallingConv::ID CC) {
3245 return CC == CallingConv::Fast;
3246}
3247
3248/// Return true if we might ever do TCO for calls with this calling convention.
3249static bool mayTailCallThisCC(CallingConv::ID CC) {
3250 switch (CC) {
3251 case CallingConv::C:
3252 case CallingConv::PreserveMost:
3253 case CallingConv::Swift:
3254 return true;
3255 default:
3256 return canGuaranteeTCO(CC);
3257 }
3258}
3259
3260bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3261 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3262 const SmallVectorImpl<ISD::OutputArg> &Outs,
3263 const SmallVectorImpl<SDValue> &OutVals,
3264 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3265 if (!mayTailCallThisCC(CalleeCC))
3266 return false;
3267
3268 MachineFunction &MF = DAG.getMachineFunction();
3269 const Function &CallerF = MF.getFunction();
3270 CallingConv::ID CallerCC = CallerF.getCallingConv();
3271 bool CCMatch = CallerCC == CalleeCC;
3272
3273 // Byval parameters hand the function a pointer directly into the stack area
3274 // we want to reuse during a tail call. Working around this *is* possible (see
3275 // X86) but less efficient and uglier in LowerCall.
3276 for (Function::const_arg_iterator i = CallerF.arg_begin(),
3277 e = CallerF.arg_end();
3278 i != e; ++i)
3279 if (i->hasByValAttr())
3280 return false;
3281
3282 if (getTargetMachine().Options.GuaranteedTailCallOpt)
3283 return canGuaranteeTCO(CalleeCC) && CCMatch;
3284
3285 // Externally-defined functions with weak linkage should not be
3286 // tail-called on AArch64 when the OS does not support dynamic
3287 // pre-emption of symbols, as the AAELF spec requires normal calls
3288 // to undefined weak functions to be replaced with a NOP or jump to the
3289 // next instruction. The behaviour of branch instructions in this
3290 // situation (as used for tail calls) is implementation-defined, so we
3291 // cannot rely on the linker replacing the tail call with a return.
3292 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3293 const GlobalValue *GV = G->getGlobal();
3294 const Triple &TT = getTargetMachine().getTargetTriple();
3295 if (GV->hasExternalWeakLinkage() &&
3296 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3297 return false;
3298 }
3299
3300 // Now we search for cases where we can use a tail call without changing the
3301 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3302 // concept.
3303
3304 // I want anyone implementing a new calling convention to think long and hard
3305 // about this assert.
3306 assert((!isVarArg || CalleeCC == CallingConv::C) &&(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3307, __extension__ __PRETTY_FUNCTION__))
3307 "Unexpected variadic calling convention")(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3307, __extension__ __PRETTY_FUNCTION__))
;
3308
3309 LLVMContext &C = *DAG.getContext();
3310 if (isVarArg && !Outs.empty()) {
3311 // At least two cases here: if caller is fastcc then we can't have any
3312 // memory arguments (we'd be expected to clean up the stack afterwards). If
3313 // caller is C then we could potentially use its argument area.
3314
3315 // FIXME: for now we take the most conservative of these in both cases:
3316 // disallow all variadic memory operands.
3317 SmallVector<CCValAssign, 16> ArgLocs;
3318 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3319
3320 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3321 for (const CCValAssign &ArgLoc : ArgLocs)
3322 if (!ArgLoc.isRegLoc())
3323 return false;
3324 }
3325
3326 // Check that the call results are passed in the same way.
3327 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3328 CCAssignFnForCall(CalleeCC, isVarArg),
3329 CCAssignFnForCall(CallerCC, isVarArg)))
3330 return false;
3331 // The callee has to preserve all registers the caller needs to preserve.
3332 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3333 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3334 if (!CCMatch) {
3335 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3336 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3337 return false;
3338 }
3339
3340 // Nothing more to check if the callee is taking no arguments
3341 if (Outs.empty())
3342 return true;
3343
3344 SmallVector<CCValAssign, 16> ArgLocs;
3345 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3346
3347 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3348
3349 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3350
3351 // If the stack arguments for this call do not fit into our own save area then
3352 // the call cannot be made tail.
3353 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3354 return false;
3355
3356 const MachineRegisterInfo &MRI = MF.getRegInfo();
3357 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3358 return false;
3359
3360 return true;
3361}
3362
3363SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3364 SelectionDAG &DAG,
3365 MachineFrameInfo &MFI,
3366 int ClobberedFI) const {
3367 SmallVector<SDValue, 8> ArgChains;
3368 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3369 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3370
3371 // Include the original chain at the beginning of the list. When this is
3372 // used by target LowerCall hooks, this helps legalize find the
3373 // CALLSEQ_BEGIN node.
3374 ArgChains.push_back(Chain);
3375
3376 // Add a chain value for each stack argument corresponding
3377 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
3378 UE = DAG.getEntryNode().getNode()->use_end();
3379 U != UE; ++U)
3380 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
3381 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
3382 if (FI->getIndex() < 0) {
3383 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
3384 int64_t InLastByte = InFirstByte;
3385 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
3386
3387 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
3388 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
3389 ArgChains.push_back(SDValue(L, 1));
3390 }
3391
3392 // Build a tokenfactor for all the chains.
3393 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
3394}
3395
3396bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
3397 bool TailCallOpt) const {
3398 return CallCC == CallingConv::Fast && TailCallOpt;
3399}
3400
3401/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
3402/// and add input and output parameter nodes.
3403SDValue
3404AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
3405 SmallVectorImpl<SDValue> &InVals) const {
3406 SelectionDAG &DAG = CLI.DAG;
3407 SDLoc &DL = CLI.DL;
3408 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
3409 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
3410 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
3411 SDValue Chain = CLI.Chain;
3412 SDValue Callee = CLI.Callee;
3413 bool &IsTailCall = CLI.IsTailCall;
3414 CallingConv::ID CallConv = CLI.CallConv;
3415 bool IsVarArg = CLI.IsVarArg;
3416
3417 MachineFunction &MF = DAG.getMachineFunction();
3418 bool IsThisReturn = false;
3419
3420 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3421 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3422 bool IsSibCall = false;
3423
3424 if (IsTailCall) {
3425 // Check if it's really possible to do a tail call.
3426 IsTailCall = isEligibleForTailCallOptimization(
3427 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3428 if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
3429 report_fatal_error("failed to perform tail call elimination on a call "
3430 "site marked musttail");
3431
3432 // A sibling call is one where we're under the usual C ABI and not planning
3433 // to change that but can still do a tail call:
3434 if (!TailCallOpt && IsTailCall)
3435 IsSibCall = true;
3436
3437 if (IsTailCall)
3438 ++NumTailCalls;
3439 }
3440
3441 // Analyze operands of the call, assigning locations to each operand.
3442 SmallVector<CCValAssign, 16> ArgLocs;
3443 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
3444 *DAG.getContext());
3445
3446 if (IsVarArg) {
3447 // Handle fixed and variable vector arguments differently.
3448 // Variable vector arguments always go into memory.
3449 unsigned NumArgs = Outs.size();
3450
3451 for (unsigned i = 0; i != NumArgs; ++i) {
3452 MVT ArgVT = Outs[i].VT;
3453 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3454 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
3455 /*IsVarArg=*/ !Outs[i].IsFixed);
3456 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
3457 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3457, __extension__ __PRETTY_FUNCTION__))
;
3458 (void)Res;
3459 }
3460 } else {
3461 // At this point, Outs[].VT may already be promoted to i32. To correctly
3462 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3463 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3464 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
3465 // we use a special version of AnalyzeCallOperands to pass in ValVT and
3466 // LocVT.
3467 unsigned NumArgs = Outs.size();
3468 for (unsigned i = 0; i != NumArgs; ++i) {
3469 MVT ValVT = Outs[i].VT;
3470 // Get type of the original argument.
3471 EVT ActualVT = getValueType(DAG.getDataLayout(),
3472 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
3473 /*AllowUnknown*/ true);
3474 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
3475 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3476 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3477 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3478 ValVT = MVT::i8;
3479 else if (ActualMVT == MVT::i16)
3480 ValVT = MVT::i16;
3481
3482 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3483 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
3484 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3484, __extension__ __PRETTY_FUNCTION__))
;
3485 (void)Res;
3486 }
3487 }
3488
3489 // Get a count of how many bytes are to be pushed on the stack.
3490 unsigned NumBytes = CCInfo.getNextStackOffset();
3491
3492 if (IsSibCall) {
3493 // Since we're not changing the ABI to make this a tail call, the memory
3494 // operands are already available in the caller's incoming argument space.
3495 NumBytes = 0;
3496 }
3497
3498 // FPDiff is the byte offset of the call's argument area from the callee's.
3499 // Stores to callee stack arguments will be placed in FixedStackSlots offset
3500 // by this amount for a tail call. In a sibling call it must be 0 because the
3501 // caller will deallocate the entire stack and the callee still expects its
3502 // arguments to begin at SP+0. Completely unused for non-tail calls.
3503 int FPDiff = 0;
3504
3505 if (IsTailCall && !IsSibCall) {
3506 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
3507
3508 // Since callee will pop argument stack as a tail call, we must keep the
3509 // popped size 16-byte aligned.
3510 NumBytes = alignTo(NumBytes, 16);
3511
3512 // FPDiff will be negative if this tail call requires more space than we
3513 // would automatically have in our incoming argument space. Positive if we
3514 // can actually shrink the stack.
3515 FPDiff = NumReusableBytes - NumBytes;
3516
3517 // The stack pointer must be 16-byte aligned at all times it's used for a
3518 // memory operation, which in practice means at *all* times and in
3519 // particular across call boundaries. Therefore our own arguments started at
3520 // a 16-byte aligned SP and the delta applied for the tail call should
3521 // satisfy the same constraint.
3522 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")(static_cast <bool> (FPDiff % 16 == 0 && "unaligned stack on tail call"
) ? void (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3522, __extension__ __PRETTY_FUNCTION__))
;
3523 }
3524
3525 // Adjust the stack pointer for the new arguments...
3526 // These operations are automatically eliminated by the prolog/epilog pass
3527 if (!IsSibCall)
3528 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
3529
3530 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
3531 getPointerTy(DAG.getDataLayout()));
3532
3533 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3534 SmallVector<SDValue, 8> MemOpChains;
3535 auto PtrVT = getPointerTy(DAG.getDataLayout());
3536
3537 // Walk the register/memloc assignments, inserting copies/loads.
3538 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
3539 ++i, ++realArgIdx) {
3540 CCValAssign &VA = ArgLocs[i];
3541 SDValue Arg = OutVals[realArgIdx];
3542 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3543
3544 // Promote the value if needed.
3545 switch (VA.getLocInfo()) {
3546 default:
3547 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3547)
;
3548 case CCValAssign::Full:
3549 break;
3550 case CCValAssign::SExt:
3551 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
3552 break;
3553 case CCValAssign::ZExt:
3554 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3555 break;
3556 case CCValAssign::AExt:
3557 if (Outs[realArgIdx].ArgVT == MVT::i1) {
3558 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
3559 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3560 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
3561 }
3562 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
3563 break;
3564 case CCValAssign::BCvt:
3565 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3566 break;
3567 case CCValAssign::FPExt:
3568 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
3569 break;
3570 }
3571
3572 if (VA.isRegLoc()) {
3573 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
3574 Outs[0].VT == MVT::i64) {
3575 assert(VA.getLocVT() == MVT::i64 &&(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3576, __extension__ __PRETTY_FUNCTION__))
3576 "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3576, __extension__ __PRETTY_FUNCTION__))
;
3577 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3578, __extension__ __PRETTY_FUNCTION__))
3578 "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3578, __extension__ __PRETTY_FUNCTION__))
;
3579 IsThisReturn = true;
3580 }
3581 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3582 } else {
3583 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3583, __extension__ __PRETTY_FUNCTION__))
;
3584
3585 SDValue DstAddr;
3586 MachinePointerInfo DstInfo;
3587
3588 // FIXME: This works on big-endian for composite byvals, which are the
3589 // common case. It should also work for fundamental types too.
3590 uint32_t BEAlign = 0;
3591 unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
3592 : VA.getValVT().getSizeInBits();
3593 OpSize = (OpSize + 7) / 8;
3594 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
3595 !Flags.isInConsecutiveRegs()) {
3596 if (OpSize < 8)
3597 BEAlign = 8 - OpSize;
3598 }
3599 unsigned LocMemOffset = VA.getLocMemOffset();
3600 int32_t Offset = LocMemOffset + BEAlign;
3601 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3602 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3603
3604 if (IsTailCall) {
3605 Offset = Offset + FPDiff;
3606 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3607
3608 DstAddr = DAG.getFrameIndex(FI, PtrVT);
3609 DstInfo =
3610 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
3611
3612 // Make sure any stack arguments overlapping with where we're storing
3613 // are loaded before this eventual operation. Otherwise they'll be
3614 // clobbered.
3615 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
3616 } else {
3617 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3618
3619 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3620 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
3621 LocMemOffset);
3622 }
3623
3624 if (Outs[i].Flags.isByVal()) {
3625 SDValue SizeNode =
3626 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
3627 SDValue Cpy = DAG.getMemcpy(
3628 Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
3629 /*isVol = */ false, /*AlwaysInline = */ false,
3630 /*isTailCall = */ false,
3631 DstInfo, MachinePointerInfo());
3632
3633 MemOpChains.push_back(Cpy);
3634 } else {
3635 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
3636 // promoted to a legal register type i32, we should truncate Arg back to
3637 // i1/i8/i16.
3638 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
3639 VA.getValVT() == MVT::i16)
3640 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
3641
3642 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
3643 MemOpChains.push_back(Store);
3644 }
3645 }
3646 }
3647
3648 if (!MemOpChains.empty())
3649 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3650
3651 // Build a sequence of copy-to-reg nodes chained together with token chain
3652 // and flag operands which copy the outgoing args into the appropriate regs.
3653 SDValue InFlag;
3654 for (auto &RegToPass : RegsToPass) {
3655 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
3656 RegToPass.second, InFlag);
3657 InFlag = Chain.getValue(1);
3658 }
3659
3660 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
3661 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
3662 // node so that legalize doesn't hack it.
3663 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3664 auto GV = G->getGlobal();
3665 if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) ==
3666 AArch64II::MO_GOT) {
3667 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
3668 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3669 } else if (Subtarget->isTargetCOFF() && GV->hasDLLImportStorageClass()) {
3670 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3671, __extension__ __PRETTY_FUNCTION__))
3671 "Windows is the only supported COFF target")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3671, __extension__ __PRETTY_FUNCTION__))
;
3672 Callee = getGOT(G, DAG, AArch64II::MO_DLLIMPORT);
3673 } else {
3674 const GlobalValue *GV = G->getGlobal();
3675 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3676 }
3677 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3678 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3679 Subtarget->isTargetMachO()) {
3680 const char *Sym = S->getSymbol();
3681 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
3682 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3683 } else {
3684 const char *Sym = S->getSymbol();
3685 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
3686 }
3687 }
3688
3689 // We don't usually want to end the call-sequence here because we would tidy
3690 // the frame up *after* the call, however in the ABI-changing tail-call case
3691 // we've carefully laid out the parameters so that when sp is reset they'll be
3692 // in the correct location.
3693 if (IsTailCall && !IsSibCall) {
3694 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3695 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
3696 InFlag = Chain.getValue(1);
3697 }
3698
3699 std::vector<SDValue> Ops;
3700 Ops.push_back(Chain);
3701 Ops.push_back(Callee);
3702
3703 if (IsTailCall) {
3704 // Each tail call may have to adjust the stack by a different amount, so
3705 // this information must travel along with the operation for eventual
3706 // consumption by emitEpilogue.
3707 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
3708 }
3709
3710 // Add argument registers to the end of the list so that they are known live
3711 // into the call.
3712 for (auto &RegToPass : RegsToPass)
3713 Ops.push_back(DAG.getRegister(RegToPass.first,
3714 RegToPass.second.getValueType()));
3715
3716 // Add a register mask operand representing the call-preserved registers.
3717 const uint32_t *Mask;
3718 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3719 if (IsThisReturn) {
3720 // For 'this' returns, use the X0-preserving mask if applicable
3721 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
3722 if (!Mask) {
3723 IsThisReturn = false;
3724 Mask = TRI->getCallPreservedMask(MF, CallConv);
3725 }
3726 } else
3727 Mask = TRI->getCallPreservedMask(MF, CallConv);
3728
3729 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3729, __extension__ __PRETTY_FUNCTION__))
;
3730 Ops.push_back(DAG.getRegisterMask(Mask));
3731
3732 if (InFlag.getNode())
3733 Ops.push_back(InFlag);
3734
3735 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3736
3737 // If we're doing a tall call, use a TC_RETURN here rather than an
3738 // actual call instruction.
3739 if (IsTailCall) {
3740 MF.getFrameInfo().setHasTailCall();
3741 return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
3742 }
3743
3744 // Returns a chain and a flag for retval copy to use.
3745 Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
3746 InFlag = Chain.getValue(1);
3747
3748 uint64_t CalleePopBytes =
3749 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
3750
3751 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3752 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
3753 InFlag, DL);
3754 if (!Ins.empty())
3755 InFlag = Chain.getValue(1);
3756
3757 // Handle result values, copying them out of physregs into vregs that we
3758 // return.
3759 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
3760 InVals, IsThisReturn,
3761 IsThisReturn ? OutVals[0] : SDValue());
3762}
3763
3764bool AArch64TargetLowering::CanLowerReturn(
3765 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3766 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3767 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3768 ? RetCC_AArch64_WebKit_JS
3769 : RetCC_AArch64_AAPCS;
3770 SmallVector<CCValAssign, 16> RVLocs;
3771 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3772 return CCInfo.CheckReturn(Outs, RetCC);
3773}
3774
3775SDValue
3776AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3777 bool isVarArg,
3778 const SmallVectorImpl<ISD::OutputArg> &Outs,
3779 const SmallVectorImpl<SDValue> &OutVals,
3780 const SDLoc &DL, SelectionDAG &DAG) const {
3781 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3782 ? RetCC_AArch64_WebKit_JS
3783 : RetCC_AArch64_AAPCS;
3784 SmallVector<CCValAssign, 16> RVLocs;
3785 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3786 *DAG.getContext());
3787 CCInfo.AnalyzeReturn(Outs, RetCC);
3788
3789 // Copy the result values into the output registers.
3790 SDValue Flag;
3791 SmallVector<SDValue, 4> RetOps(1, Chain);
3792 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
3793 ++i, ++realRVLocIdx) {
3794 CCValAssign &VA = RVLocs[i];
3795 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3795, __extension__ __PRETTY_FUNCTION__))
;
3796 SDValue Arg = OutVals[realRVLocIdx];
3797
3798 switch (VA.getLocInfo()) {
3799 default:
3800 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3800)
;
3801 case CCValAssign::Full:
3802 if (Outs[i].ArgVT == MVT::i1) {
3803 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
3804 // value. This is strictly redundant on Darwin (which uses "zeroext
3805 // i1"), but will be optimised out before ISel.
3806 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3807 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3808 }
3809 break;
3810 case CCValAssign::BCvt:
3811 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3812 break;
3813 }
3814
3815 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
3816 Flag = Chain.getValue(1);
3817 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3818 }
3819 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3820 const MCPhysReg *I =
3821 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3822 if (I) {
3823 for (; *I; ++I) {
3824 if (AArch64::GPR64RegClass.contains(*I))
3825 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3826 else if (AArch64::FPR64RegClass.contains(*I))
3827 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3828 else
3829 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3829)
;
3830 }
3831 }
3832
3833 RetOps[0] = Chain; // Update chain.
3834
3835 // Add the flag if we have it.
3836 if (Flag.getNode())
3837 RetOps.push_back(Flag);
3838
3839 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
3840}
3841
3842//===----------------------------------------------------------------------===//
3843// Other Lowering Code
3844//===----------------------------------------------------------------------===//
3845
3846SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
3847 SelectionDAG &DAG,
3848 unsigned Flag) const {
3849 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty,
3850 N->getOffset(), Flag);
3851}
3852
3853SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
3854 SelectionDAG &DAG,
3855 unsigned Flag) const {
3856 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
3857}
3858
3859SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
3860 SelectionDAG &DAG,
3861 unsigned Flag) const {
3862 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
3863 N->getOffset(), Flag);
3864}
3865
3866SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
3867 SelectionDAG &DAG,
3868 unsigned Flag) const {
3869 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
3870}
3871
3872// (loadGOT sym)
3873template <class NodeTy>
3874SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
3875 unsigned Flags) const {
3876 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
3877 SDLoc DL(N);
3878 EVT Ty = getPointerTy(DAG.getDataLayout());
3879 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
3880 // FIXME: Once remat is capable of dealing with instructions with register
3881 // operands, expand this into two nodes instead of using a wrapper node.
3882 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
3883}
3884
3885// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
3886template <class NodeTy>
3887SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
3888 unsigned Flags) const {
3889 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
3890 SDLoc DL(N);
3891 EVT Ty = getPointerTy(DAG.getDataLayout());
3892 const unsigned char MO_NC = AArch64II::MO_NC;
3893 return DAG.getNode(
3894 AArch64ISD::WrapperLarge, DL, Ty,
3895 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
3896 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
3897 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
3898 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
3899}
3900
3901// (addlow (adrp %hi(sym)) %lo(sym))
3902template <class NodeTy>
3903SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3904 unsigned Flags) const {
3905 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
3906 SDLoc DL(N);
3907 EVT Ty = getPointerTy(DAG.getDataLayout());
3908 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
3909 SDValue Lo = getTargetNode(N, Ty, DAG,
3910 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
3911 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
3912 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
3913}
3914
3915SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
3916 SelectionDAG &DAG) const {
3917 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
3918 const GlobalValue *GV = GN->getGlobal();
3919 const AArch64II::TOF TargetFlags =
3920 (GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
3921 : AArch64II::MO_NO_FLAG);
3922 unsigned char OpFlags =
3923 Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
3924
3925 if (OpFlags != AArch64II::MO_NO_FLAG)
3926 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3927, __extension__ __PRETTY_FUNCTION__))
3927 "unexpected offset in global node")(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3927, __extension__ __PRETTY_FUNCTION__))
;
3928
3929 // This also catches the large code model case for Darwin.
3930 if ((OpFlags & AArch64II::MO_GOT) != 0) {
3931 return getGOT(GN, DAG, TargetFlags);
3932 }
3933
3934 SDValue Result;
3935 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
3936 Result = getAddrLarge(GN, DAG, TargetFlags);
3937 } else {
3938 Result = getAddr(GN, DAG, TargetFlags);
3939 }
3940 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3941 SDLoc DL(GN);
3942 if (GV->hasDLLImportStorageClass())
3943 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3944 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3945 return Result;
3946}
3947
3948/// Convert a TLS address reference into the correct sequence of loads
3949/// and calls to compute the variable's address (for Darwin, currently) and
3950/// return an SDValue containing the final node.
3951
3952/// Darwin only has one TLS scheme which must be capable of dealing with the
3953/// fully general situation, in the worst case. This means:
3954/// + "extern __thread" declaration.
3955/// + Defined in a possibly unknown dynamic library.
3956///
3957/// The general system is that each __thread variable has a [3 x i64] descriptor
3958/// which contains information used by the runtime to calculate the address. The
3959/// only part of this the compiler needs to know about is the first xword, which
3960/// contains a function pointer that must be called with the address of the
3961/// entire descriptor in "x0".
3962///
3963/// Since this descriptor may be in a different unit, in general even the
3964/// descriptor must be accessed via an indirect load. The "ideal" code sequence
3965/// is:
3966/// adrp x0, _var@TLVPPAGE
3967/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
3968/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
3969/// ; the function pointer
3970/// blr x1 ; Uses descriptor address in x0
3971/// ; Address of _var is now in x0.
3972///
3973/// If the address of _var's descriptor *is* known to the linker, then it can
3974/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
3975/// a slight efficiency gain.
3976SDValue
3977AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
3978 SelectionDAG &DAG) const {
3979 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3980, __extension__ __PRETTY_FUNCTION__))
3980 "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3980, __extension__ __PRETTY_FUNCTION__))
;
3981
3982 SDLoc DL(Op);
3983 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3984 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3985
3986 SDValue TLVPAddr =
3987 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3988 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
3989
3990 // The first entry in the descriptor is a function pointer that we must call
3991 // to obtain the address of the variable.
3992 SDValue Chain = DAG.getEntryNode();
3993 SDValue FuncTLVGet = DAG.getLoad(
3994 MVT::i64, DL, Chain, DescAddr,
3995 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
3996 /* Alignment = */ 8,
3997 MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant |
3998 MachineMemOperand::MODereferenceable);
3999 Chain = FuncTLVGet.getValue(1);
4000
4001 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4002 MFI.setAdjustsStack(true);
4003
4004 // TLS calls preserve all registers except those that absolutely must be
4005 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
4006 // silly).
4007 const uint32_t *Mask =
4008 Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
4009
4010 // Finally, we can make the call. This is just a degenerate version of a
4011 // normal AArch64 call node: x0 takes the address of the descriptor, and
4012 // returns the address of the variable in this thread.
4013 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
4014 Chain =
4015 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
4016 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
4017 DAG.getRegisterMask(Mask), Chain.getValue(1));
4018 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
4019}
4020
4021/// When accessing thread-local variables under either the general-dynamic or
4022/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
4023/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
4024/// is a function pointer to carry out the resolution.
4025///
4026/// The sequence is:
4027/// adrp x0, :tlsdesc:var
4028/// ldr x1, [x0, #:tlsdesc_lo12:var]
4029/// add x0, x0, #:tlsdesc_lo12:var
4030/// .tlsdesccall var
4031/// blr x1
4032/// (TPIDR_EL0 offset now in x0)
4033///
4034/// The above sequence must be produced unscheduled, to enable the linker to
4035/// optimize/relax this sequence.
4036/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
4037/// above sequence, and expanded really late in the compilation flow, to ensure
4038/// the sequence is produced as per above.
4039SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
4040 const SDLoc &DL,
4041 SelectionDAG &DAG) const {
4042 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4043
4044 SDValue Chain = DAG.getEntryNode();
4045 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4046
4047 Chain =
4048 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
4049 SDValue Glue = Chain.getValue(1);
4050
4051 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
4052}
4053
4054SDValue
4055AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
4056 SelectionDAG &DAG) const {
4057 assert(Subtarget->isTargetELF() && "This function expects an ELF target")(static_cast <bool> (Subtarget->isTargetELF() &&
"This function expects an ELF target") ? void (0) : __assert_fail
("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4057, __extension__ __PRETTY_FUNCTION__))
;
4058 assert(Subtarget->useSmallAddressing() &&(static_cast <bool> (Subtarget->useSmallAddressing()
&& "ELF TLS only supported in small memory model") ?
void (0) : __assert_fail ("Subtarget->useSmallAddressing() && \"ELF TLS only supported in small memory model\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4059, __extension__ __PRETTY_FUNCTION__))
4059 "ELF TLS only supported in small memory model")(static_cast <bool> (Subtarget->useSmallAddressing()
&& "ELF TLS only supported in small memory model") ?
void (0) : __assert_fail ("Subtarget->useSmallAddressing() && \"ELF TLS only supported in small memory model\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4059, __extension__ __PRETTY_FUNCTION__))
;
4060 // Different choices can be made for the maximum size of the TLS area for a
4061 // module. For the small address model, the default TLS size is 16MiB and the
4062 // maximum TLS size is 4GiB.
4063 // FIXME: add -mtls-size command line option and make it control the 16MiB
4064 // vs. 4GiB code sequence generation.
4065 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4066
4067 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
4068
4069 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
4070 if (Model == TLSModel::LocalDynamic)
4071 Model = TLSModel::GeneralDynamic;
4072 }
4073
4074 SDValue TPOff;
4075 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4076 SDLoc DL(Op);
4077 const GlobalValue *GV = GA->getGlobal();
4078
4079 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
4080
4081 if (Model == TLSModel::LocalExec) {
4082 SDValue HiVar = DAG.getTargetGlobalAddress(
4083 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4084 SDValue LoVar = DAG.getTargetGlobalAddress(
4085 GV, DL, PtrVT, 0,
4086 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4087
4088 SDValue TPWithOff_lo =
4089 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
4090 HiVar,
4091 DAG.getTargetConstant(0, DL, MVT::i32)),
4092 0);
4093 SDValue TPWithOff =
4094 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
4095 LoVar,
4096 DAG.getTargetConstant(0, DL, MVT::i32)),
4097 0);
4098 return TPWithOff;
4099 } else if (Model == TLSModel::InitialExec) {
4100 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4101 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
4102 } else if (Model == TLSModel::LocalDynamic) {
4103 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
4104 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
4105 // the beginning of the module's TLS region, followed by a DTPREL offset
4106 // calculation.
4107
4108 // These accesses will need deduplicating if there's more than one.
4109 AArch64FunctionInfo *MFI =
4110 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4111 MFI->incNumLocalDynamicTLSAccesses();
4112
4113 // The call needs a relocation too for linker relaxation. It doesn't make
4114 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4115 // the address.
4116 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
4117 AArch64II::MO_TLS);
4118
4119 // Now we can calculate the offset from TPIDR_EL0 to this module's
4120 // thread-local area.
4121 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4122
4123 // Now use :dtprel_whatever: operations to calculate this variable's offset
4124 // in its thread-storage area.
4125 SDValue HiVar = DAG.getTargetGlobalAddress(
4126 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4127 SDValue LoVar = DAG.getTargetGlobalAddress(
4128 GV, DL, MVT::i64, 0,
4129 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4130
4131 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
4132 DAG.getTargetConstant(0, DL, MVT::i32)),
4133 0);
4134 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
4135 DAG.getTargetConstant(0, DL, MVT::i32)),
4136 0);
4137 } else if (Model == TLSModel::GeneralDynamic) {
4138 // The call needs a relocation too for linker relaxation. It doesn't make
4139 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
4140 // the address.
4141 SDValue SymAddr =
4142 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
4143
4144 // Finally we can make a call to calculate the offset from tpidr_el0.
4145 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
4146 } else
4147 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4147)
;
4148
4149 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
4150}
4151
4152SDValue
4153AArch64TargetLowering::LowerWindowsGlobalTLSAddress(SDValue Op,
4154 SelectionDAG &DAG) const {
4155 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows specific TLS lowering") ? void (0) : __assert_fail (
"Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4155, __extension__ __PRETTY_FUNCTION__))
;
4156
4157 SDValue Chain = DAG.getEntryNode();
4158 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4159 SDLoc DL(Op);
4160
4161 SDValue TEB = DAG.getRegister(AArch64::X18, MVT::i64);
4162
4163 // Load the ThreadLocalStoragePointer from the TEB
4164 // A pointer to the TLS array is located at offset 0x58 from the TEB.
4165 SDValue TLSArray =
4166 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x58, DL));
4167 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
4168 Chain = TLSArray.getValue(1);
4169
4170 // Load the TLS index from the C runtime;
4171 // This does the same as getAddr(), but without having a GlobalAddressSDNode.
4172 // This also does the same as LOADgot, but using a generic i32 load,
4173 // while LOADgot only loads i64.
4174 SDValue TLSIndexHi =
4175 DAG.getTargetExternalSymbol("_tls_index", PtrVT, AArch64II::MO_PAGE);
4176 SDValue TLSIndexLo = DAG.getTargetExternalSymbol(
4177 "_tls_index", PtrVT, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4178 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, PtrVT, TLSIndexHi);
4179 SDValue TLSIndex =
4180 DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, ADRP, TLSIndexLo);
4181 TLSIndex = DAG.getLoad(MVT::i32, DL, Chain, TLSIndex, MachinePointerInfo());
4182 Chain = TLSIndex.getValue(1);
4183
4184 // The pointer to the thread's TLS data area is at the TLS Index scaled by 8
4185 // offset into the TLSArray.
4186 TLSIndex = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TLSIndex);
4187 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
4188 DAG.getConstant(3, DL, PtrVT));
4189 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
4190 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
4191 MachinePointerInfo());
4192 Chain = TLS.getValue(1);
4193
4194 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4195 const GlobalValue *GV = GA->getGlobal();
4196 SDValue TGAHi = DAG.getTargetGlobalAddress(
4197 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
4198 SDValue TGALo = DAG.getTargetGlobalAddress(
4199 GV, DL, PtrVT, 0,
4200 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
4201
4202 // Add the offset from the start of the .tls section (section base).
4203 SDValue Addr =
4204 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TLS, TGAHi,
4205 DAG.getTargetConstant(0, DL, MVT::i32)),
4206 0);
4207 Addr = DAG.getNode(AArch64ISD::ADDlow, DL, PtrVT, Addr, TGALo);
4208 return Addr;
4209}
4210
4211SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
4212 SelectionDAG &DAG) const {
4213 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
4214 if (DAG.getTarget().useEmulatedTLS())
4215 return LowerToTLSEmulatedModel(GA, DAG);
4216
4217 if (Subtarget->isTargetDarwin())
4218 return LowerDarwinGlobalTLSAddress(Op, DAG);
4219 if (Subtarget->isTargetELF())
4220 return LowerELFGlobalTLSAddress(Op, DAG);
4221 if (Subtarget->isTargetWindows())
4222 return LowerWindowsGlobalTLSAddress(Op, DAG);
4223
4224 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4224)
;
4225}
4226
4227SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4228 SDValue Chain = Op.getOperand(0);
4229 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4230 SDValue LHS = Op.getOperand(2);
4231 SDValue RHS = Op.getOperand(3);
4232 SDValue Dest = Op.getOperand(4);
4233 SDLoc dl(Op);
4234
4235 // Handle f128 first, since lowering it will result in comparing the return
4236 // value of a libcall against zero, which is just what the rest of LowerBR_CC
4237 // is expecting to deal with.
4238 if (LHS.getValueType() == MVT::f128) {
4239 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4240
4241 // If softenSetCCOperands returned a scalar, we need to compare the result
4242 // against zero to select between true and false values.
4243 if (!RHS.getNode()) {
4244 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4245 CC = ISD::SETNE;
4246 }
4247 }
4248
4249 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4250 // instruction.
4251 if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
4252 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4253 // Only lower legal XALUO ops.
4254 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4255 return SDValue();
4256
4257 // The actual operation with overflow check.
4258 AArch64CC::CondCode OFCC;
4259 SDValue Value, Overflow;
4260 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
4261
4262 if (CC == ISD::SETNE)
4263 OFCC = getInvertedCondCode(OFCC);
4264 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
4265
4266 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4267 Overflow);
4268 }
4269
4270 if (LHS.getValueType().isInteger()) {
4271 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4272, __extension__ __PRETTY_FUNCTION__))
4272 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4272, __extension__ __PRETTY_FUNCTION__))
;
4273
4274 // If the RHS of the comparison is zero, we can potentially fold this
4275 // to a specialized branch.
4276 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
4277 if (RHSC && RHSC->getZExtValue() == 0) {
4278 if (CC == ISD::SETEQ) {
4279 // See if we can use a TBZ to fold in an AND as well.
4280 // TBZ has a smaller branch displacement than CBZ. If the offset is
4281 // out of bounds, a late MI-layer pass rewrites branches.
4282 // 403.gcc is an example that hits this case.
4283 if (LHS.getOpcode() == ISD::AND &&
4284 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4285 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4286 SDValue Test = LHS.getOperand(0);
4287 uint64_t Mask = LHS.getConstantOperandVal(1);
4288 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
4289 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4290 Dest);
4291 }
4292
4293 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
4294 } else if (CC == ISD::SETNE) {
4295 // See if we can use a TBZ to fold in an AND as well.
4296 // TBZ has a smaller branch displacement than CBZ. If the offset is
4297 // out of bounds, a late MI-layer pass rewrites branches.
4298 // 403.gcc is an example that hits this case.
4299 if (LHS.getOpcode() == ISD::AND &&
4300 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4301 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4302 SDValue Test = LHS.getOperand(0);
4303 uint64_t Mask = LHS.getConstantOperandVal(1);
4304 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
4305 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4306 Dest);
4307 }
4308
4309 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
4310 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
4311 // Don't combine AND since emitComparison converts the AND to an ANDS
4312 // (a.k.a. TST) and the test in the test bit and branch instruction
4313 // becomes redundant. This would also increase register pressure.
4314 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4315 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
4316 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4317 }
4318 }
4319 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
4320 LHS.getOpcode() != ISD::AND) {
4321 // Don't combine AND since emitComparison converts the AND to an ANDS
4322 // (a.k.a. TST) and the test in the test bit and branch instruction
4323 // becomes redundant. This would also increase register pressure.
4324 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4325 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
4326 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4327 }
4328
4329 SDValue CCVal;
4330 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4331 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4332 Cmp);
4333 }
4334
4335 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4336, __extension__ __PRETTY_FUNCTION__))
4336 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4336, __extension__ __PRETTY_FUNCTION__))
;
4337
4338 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4339 // clean. Some of them require two branches to implement.
4340 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4341 AArch64CC::CondCode CC1, CC2;
4342 changeFPCCToAArch64CC(CC, CC1, CC2);
4343 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4344 SDValue BR1 =
4345 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
4346 if (CC2 != AArch64CC::AL) {
4347 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4348 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
4349 Cmp);
4350 }
4351
4352 return BR1;
4353}
4354
4355SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
4356 SelectionDAG &DAG) const {
4357 EVT VT = Op.getValueType();
4358 SDLoc DL(Op);
4359
4360 SDValue In1 = Op.getOperand(0);
4361 SDValue In2 = Op.getOperand(1);
4362 EVT SrcVT = In2.getValueType();
4363
4364 if (SrcVT.bitsLT(VT))
4365 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
4366 else if (SrcVT.bitsGT(VT))
4367 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
4368
4369 EVT VecVT;
4370 uint64_t EltMask;
4371 SDValue VecVal1, VecVal2;
4372
4373 auto setVecVal = [&] (int Idx) {
4374 if (!VT.isVector()) {
4375 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4376 DAG.getUNDEF(VecVT), In1);
4377 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4378 DAG.getUNDEF(VecVT), In2);
4379 } else {
4380 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
4381 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
4382 }
4383 };
4384
4385 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
4386 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
4387 EltMask = 0x80000000ULL;
4388 setVecVal(AArch64::ssub);
4389 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
4390 VecVT = MVT::v2i64;
4391
4392 // We want to materialize a mask with the high bit set, but the AdvSIMD
4393 // immediate moves cannot materialize that in a single instruction for
4394 // 64-bit elements. Instead, materialize zero and then negate it.
4395 EltMask = 0;
4396
4397 setVecVal(AArch64::dsub);
4398 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
4399 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
4400 EltMask = 0x8000ULL;
4401 setVecVal(AArch64::hsub);
4402 } else {
4403 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4403)
;
4404 }
4405
4406 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
4407
4408 // If we couldn't materialize the mask above, then the mask vector will be
4409 // the zero vector, and we need to negate it here.
4410 if (VT == MVT::f64 || VT == MVT::v2f64) {
4411 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
4412 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
4413 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
4414 }
4415
4416 SDValue Sel =
4417 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
4418
4419 if (VT == MVT::f16)
4420 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
4421 if (VT == MVT::f32)
4422 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
4423 else if (VT == MVT::f64)
4424 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
4425 else
4426 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
4427}
4428
4429SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
4430 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
4431 Attribute::NoImplicitFloat))
4432 return SDValue();
4433
4434 if (!Subtarget->hasNEON())
4435 return SDValue();
4436
4437 // While there is no integer popcount instruction, it can
4438 // be more efficiently lowered to the following sequence that uses
4439 // AdvSIMD registers/instructions as long as the copies to/from
4440 // the AdvSIMD registers are cheap.
4441 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
4442 // CNT V0.8B, V0.8B // 8xbyte pop-counts
4443 // ADDV B0, V0.8B // sum 8xbyte pop-counts
4444 // UMOV X0, V0.B[0] // copy byte result back to integer reg
4445 SDValue Val = Op.getOperand(0);
4446 SDLoc DL(Op);
4447 EVT VT = Op.getValueType();
4448
4449 if (VT == MVT::i32)
4450 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
4451 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
4452
4453 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
4454 SDValue UaddLV = DAG.getNode(
4455 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
4456 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
4457
4458 if (VT == MVT::i64)
4459 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
4460 return UaddLV;
4461}
4462
4463SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
4464
4465 if (Op.getValueType().isVector())
4466 return LowerVSETCC(Op, DAG);
4467
4468 SDValue LHS = Op.getOperand(0);
4469 SDValue RHS = Op.getOperand(1);
4470 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
4471 SDLoc dl(Op);
4472
4473 // We chose ZeroOrOneBooleanContents, so use zero and one.
4474 EVT VT = Op.getValueType();
4475 SDValue TVal = DAG.getConstant(1, dl, VT);
4476 SDValue FVal = DAG.getConstant(0, dl, VT);
4477
4478 // Handle f128 first, since one possible outcome is a normal integer
4479 // comparison which gets picked up by the next if statement.
4480 if (LHS.getValueType() == MVT::f128) {
4481 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4482
4483 // If softenSetCCOperands returned a scalar, use it.
4484 if (!RHS.getNode()) {
4485 assert(LHS.getValueType() == Op.getValueType() &&(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4486, __extension__ __PRETTY_FUNCTION__))
4486 "Unexpected setcc expansion!")(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4486, __extension__ __PRETTY_FUNCTION__))
;
4487 return LHS;
4488 }
4489 }
4490
4491 if (LHS.getValueType().isInteger()) {
4492 SDValue CCVal;
4493 SDValue Cmp =
4494 getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
4495
4496 // Note that we inverted the condition above, so we reverse the order of
4497 // the true and false operands here. This will allow the setcc to be
4498 // matched to a single CSINC instruction.
4499 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
4500 }
4501
4502 // Now we know we're dealing with FP values.
4503 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4504, __extension__ __PRETTY_FUNCTION__))
4504 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4504, __extension__ __PRETTY_FUNCTION__))
;
4505
4506 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
4507 // and do the comparison.
4508 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4509
4510 AArch64CC::CondCode CC1, CC2;
4511 changeFPCCToAArch64CC(CC, CC1, CC2);
4512 if (CC2 == AArch64CC::AL) {
4513 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
4514 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4515
4516 // Note that we inverted the condition above, so we reverse the order of
4517 // the true and false operands here. This will allow the setcc to be
4518 // matched to a single CSINC instruction.
4519 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
4520 } else {
4521 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
4522 // totally clean. Some of them require two CSELs to implement. As is in
4523 // this case, we emit the first CSEL and then emit a second using the output
4524 // of the first as the RHS. We're effectively OR'ing the two CC's together.
4525
4526 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
4527 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4528 SDValue CS1 =
4529 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4530
4531 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4532 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4533 }
4534}
4535
4536SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
4537 SDValue RHS, SDValue TVal,
4538 SDValue FVal, const SDLoc &dl,
4539 SelectionDAG &DAG) const {
4540 // Handle f128 first, because it will result in a comparison of some RTLIB
4541 // call result against zero.
4542 if (LHS.getValueType() == MVT::f128) {
4543 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4544
4545 // If softenSetCCOperands returned a scalar, we need to compare the result
4546 // against zero to select between true and false values.
4547 if (!RHS.getNode()) {
4548 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4549 CC = ISD::SETNE;
4550 }
4551 }
4552
4553 // Also handle f16, for which we need to do a f32 comparison.
4554 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4555 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
4556 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
4557 }
4558
4559 // Next, handle integers.
4560 if (LHS.getValueType().isInteger()) {
4561 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4562, __extension__ __PRETTY_FUNCTION__))
4562 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4562, __extension__ __PRETTY_FUNCTION__))
;
4563
4564 unsigned Opcode = AArch64ISD::CSEL;
4565
4566 // If both the TVal and the FVal are constants, see if we can swap them in
4567 // order to for a CSINV or CSINC out of them.
4568 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
4569 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
4570
4571 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
4572 std::swap(TVal, FVal);
4573 std::swap(CTVal, CFVal);
4574 CC = ISD::getSetCCInverse(CC, true);
4575 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
4576 std::swap(TVal, FVal);
4577 std::swap(CTVal, CFVal);
4578 CC = ISD::getSetCCInverse(CC, true);
4579 } else if (TVal.getOpcode() == ISD::XOR) {
4580 // If TVal is a NOT we want to swap TVal and FVal so that we can match
4581 // with a CSINV rather than a CSEL.
4582 if (isAllOnesConstant(TVal.getOperand(1))) {
4583 std::swap(TVal, FVal);
4584 std::swap(CTVal, CFVal);
4585 CC = ISD::getSetCCInverse(CC, true);
4586 }
4587 } else if (TVal.getOpcode() == ISD::SUB) {
4588 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
4589 // that we can match with a CSNEG rather than a CSEL.
4590 if (isNullConstant(TVal.getOperand(0))) {
4591 std::swap(TVal, FVal);
4592 std::swap(CTVal, CFVal);
4593 CC = ISD::getSetCCInverse(CC, true);
4594 }
4595 } else if (CTVal && CFVal) {
4596 const int64_t TrueVal = CTVal->getSExtValue();
4597 const int64_t FalseVal = CFVal->getSExtValue();
4598 bool Swap = false;
4599
4600 // If both TVal and FVal are constants, see if FVal is the
4601 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
4602 // instead of a CSEL in that case.
4603 if (TrueVal == ~FalseVal) {
4604 Opcode = AArch64ISD::CSINV;
4605 } else if (TrueVal == -FalseVal) {
4606 Opcode = AArch64ISD::CSNEG;
4607 } else if (TVal.getValueType() == MVT::i32) {
4608 // If our operands are only 32-bit wide, make sure we use 32-bit
4609 // arithmetic for the check whether we can use CSINC. This ensures that
4610 // the addition in the check will wrap around properly in case there is
4611 // an overflow (which would not be the case if we do the check with
4612 // 64-bit arithmetic).
4613 const uint32_t TrueVal32 = CTVal->getZExtValue();
4614 const uint32_t FalseVal32 = CFVal->getZExtValue();
4615
4616 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
4617 Opcode = AArch64ISD::CSINC;
4618
4619 if (TrueVal32 > FalseVal32) {
4620 Swap = true;
4621 }
4622 }
4623 // 64-bit check whether we can use CSINC.
4624 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
4625 Opcode = AArch64ISD::CSINC;
4626
4627 if (TrueVal > FalseVal) {
4628 Swap = true;
4629 }
4630 }
4631
4632 // Swap TVal and FVal if necessary.
4633 if (Swap) {
4634 std::swap(TVal, FVal);
4635 std::swap(CTVal, CFVal);
4636 CC = ISD::getSetCCInverse(CC, true);
4637 }
4638
4639 if (Opcode != AArch64ISD::CSEL) {
4640 // Drop FVal since we can get its value by simply inverting/negating
4641 // TVal.
4642 FVal = TVal;
4643 }
4644 }
4645
4646 // Avoid materializing a constant when possible by reusing a known value in
4647 // a register. However, don't perform this optimization if the known value
4648 // is one, zero or negative one in the case of a CSEL. We can always
4649 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
4650 // FVal, respectively.
4651 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
4652 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
4653 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
4654 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4655 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
4656 // "a != C ? x : a" to avoid materializing C.
4657 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
4658 TVal = LHS;
4659 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
4660 FVal = LHS;
4661 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
4662 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")(static_cast <bool> (CTVal && CFVal && "Expected constant operands for CSNEG."
) ? void (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4662, __extension__ __PRETTY_FUNCTION__))
;
4663 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
4664 // avoid materializing C.
4665 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4666 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
4667 Opcode = AArch64ISD::CSINV;
4668 TVal = LHS;
4669 FVal = DAG.getConstant(0, dl, FVal.getValueType());
4670 }
4671 }
4672
4673 SDValue CCVal;
4674 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4675 EVT VT = TVal.getValueType();
4676 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
4677 }
4678
4679 // Now we know we're dealing with FP values.
4680 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4681, __extension__ __PRETTY_FUNCTION__))
4681 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4681, __extension__ __PRETTY_FUNCTION__))
;
4682 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4682, __extension__ __PRETTY_FUNCTION__))
;
4683 EVT VT = TVal.getValueType();
4684 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4685
4686 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4687 // clean. Some of them require two CSELs to implement.
4688 AArch64CC::CondCode CC1, CC2;
4689 changeFPCCToAArch64CC(CC, CC1, CC2);
4690
4691 if (DAG.getTarget().Options.UnsafeFPMath) {
4692 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
4693 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
4694 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
4695 if (RHSVal && RHSVal->isZero()) {
4696 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
4697 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
4698
4699 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
4700 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
4701 TVal = LHS;
4702 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
4703 CFVal && CFVal->isZero() &&
4704 FVal.getValueType() == LHS.getValueType())
4705 FVal = LHS;
4706 }
4707 }
4708
4709 // Emit first, and possibly only, CSEL.
4710 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4711 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4712
4713 // If we need a second CSEL, emit it, using the output of the first as the
4714 // RHS. We're effectively OR'ing the two CC's together.
4715 if (CC2 != AArch64CC::AL) {
4716 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4717 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4718 }
4719
4720 // Otherwise, return the output of the first CSEL.
4721 return CS1;
4722}
4723
4724SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
4725 SelectionDAG &DAG) const {
4726 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4727 SDValue LHS = Op.getOperand(0);
4728 SDValue RHS = Op.getOperand(1);
4729 SDValue TVal = Op.getOperand(2);
4730 SDValue FVal = Op.getOperand(3);
4731 SDLoc DL(Op);
4732 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4733}
4734
4735SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
4736 SelectionDAG &DAG) const {
4737 SDValue CCVal = Op->getOperand(0);
4738 SDValue TVal = Op->getOperand(1);
4739 SDValue FVal = Op->getOperand(2);
4740 SDLoc DL(Op);
4741
4742 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
4743 // instruction.
4744 if (isOverflowIntrOpRes(CCVal)) {
4745 // Only lower legal XALUO ops.
4746 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
4747 return SDValue();
4748
4749 AArch64CC::CondCode OFCC;
4750 SDValue Value, Overflow;
4751 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
4752 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
4753
4754 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
4755 CCVal, Overflow);
4756 }
4757
4758 // Lower it the same way as we would lower a SELECT_CC node.
4759 ISD::CondCode CC;
4760 SDValue LHS, RHS;
4761 if (CCVal.getOpcode() == ISD::SETCC) {
4762 LHS = CCVal.getOperand(0);
4763 RHS = CCVal.getOperand(1);
4764 CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
4765 } else {
4766 LHS = CCVal;
4767 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
4768 CC = ISD::SETNE;
4769 }
4770 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4771}
4772
4773SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
4774 SelectionDAG &DAG) const {
4775 // Jump table entries as PC relative offsets. No additional tweaking
4776 // is necessary here. Just get the address of the jump table.
4777 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
4778
4779 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4780 !Subtarget->isTargetMachO()) {
4781 return getAddrLarge(JT, DAG);
4782 }
4783 return getAddr(JT, DAG);
4784}
4785
4786SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
4787 SelectionDAG &DAG) const {
4788 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
4789
4790 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4791 // Use the GOT for the large code model on iOS.
4792 if (Subtarget->isTargetMachO()) {
4793 return getGOT(CP, DAG);
4794 }
4795 return getAddrLarge(CP, DAG);
4796 } else {
4797 return getAddr(CP, DAG);
4798 }
4799}
4800
4801SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
4802 SelectionDAG &DAG) const {
4803 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
4804 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4805 !Subtarget->isTargetMachO()) {
4806 return getAddrLarge(BA, DAG);
4807 } else {
4808 return getAddr(BA, DAG);
4809 }
4810}
4811
4812SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
4813 SelectionDAG &DAG) const {
4814 AArch64FunctionInfo *FuncInfo =
4815 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4816
4817 SDLoc DL(Op);
4818 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
4819 getPointerTy(DAG.getDataLayout()));
4820 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4821 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4822 MachinePointerInfo(SV));
4823}
4824
4825SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
4826 SelectionDAG &DAG) const {
4827 AArch64FunctionInfo *FuncInfo =
4828 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4829
4830 SDLoc DL(Op);
4831 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
4832 ? FuncInfo->getVarArgsGPRIndex()
4833 : FuncInfo->getVarArgsStackIndex(),
4834 getPointerTy(DAG.getDataLayout()));
4835 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4836 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4837 MachinePointerInfo(SV));
4838}
4839
4840SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
4841 SelectionDAG &DAG) const {
4842 // The layout of the va_list struct is specified in the AArch64 Procedure Call
4843 // Standard, section B.3.
4844 MachineFunction &MF = DAG.getMachineFunction();
4845 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4846 auto PtrVT = getPointerTy(DAG.getDataLayout());
4847 SDLoc DL(Op);
4848
4849 SDValue Chain = Op.getOperand(0);
4850 SDValue VAList = Op.getOperand(1);
4851 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4852 SmallVector<SDValue, 4> MemOps;
4853
4854 // void *__stack at offset 0
4855 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
4856 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
4857 MachinePointerInfo(SV), /* Alignment = */ 8));
4858
4859 // void *__gr_top at offset 8
4860 int GPRSize = FuncInfo->getVarArgsGPRSize();
4861 if (GPRSize > 0) {
4862 SDValue GRTop, GRTopAddr;
4863
4864 GRTopAddr =
4865 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
4866
4867 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
4868 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
4869 DAG.getConstant(GPRSize, DL, PtrVT));
4870
4871 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
4872 MachinePointerInfo(SV, 8),
4873 /* Alignment = */ 8));
4874 }
4875
4876 // void *__vr_top at offset 16
4877 int FPRSize = FuncInfo->getVarArgsFPRSize();
4878 if (FPRSize > 0) {
4879 SDValue VRTop, VRTopAddr;
4880 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4881 DAG.getConstant(16, DL, PtrVT));
4882
4883 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
4884 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
4885 DAG.getConstant(FPRSize, DL, PtrVT));
4886
4887 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
4888 MachinePointerInfo(SV, 16),
4889 /* Alignment = */ 8));
4890 }
4891
4892 // int __gr_offs at offset 24
4893 SDValue GROffsAddr =
4894 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
4895 MemOps.push_back(DAG.getStore(
4896 Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
4897 MachinePointerInfo(SV, 24), /* Alignment = */ 4));
4898
4899 // int __vr_offs at offset 28
4900 SDValue VROffsAddr =
4901 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
4902 MemOps.push_back(DAG.getStore(
4903 Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
4904 MachinePointerInfo(SV, 28), /* Alignment = */ 4));
4905
4906 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4907}
4908
4909SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
4910 SelectionDAG &DAG) const {
4911 MachineFunction &MF = DAG.getMachineFunction();
4912
4913 if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
4914 return LowerWin64_VASTART(Op, DAG);
4915 else if (Subtarget->isTargetDarwin())
4916 return LowerDarwin_VASTART(Op, DAG);
4917 else
4918 return LowerAAPCS_VASTART(Op, DAG);
4919}
4920
4921SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
4922 SelectionDAG &DAG) const {
4923 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
4924 // pointer.
4925 SDLoc DL(Op);
4926 unsigned VaListSize =
4927 Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
4928 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4929 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4930
4931 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
4932 Op.getOperand(2),
4933 DAG.getConstant(VaListSize, DL, MVT::i32),
4934 8, false, false, false, MachinePointerInfo(DestSV),
4935 MachinePointerInfo(SrcSV));
4936}
4937
4938SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
4939 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4940, __extension__ __PRETTY_FUNCTION__))
4940 "automatic va_arg instruction only works on Darwin")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4940, __extension__ __PRETTY_FUNCTION__))
;
4941
4942 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4943 EVT VT = Op.getValueType();
4944 SDLoc DL(Op);
4945 SDValue Chain = Op.getOperand(0);
4946 SDValue Addr = Op.getOperand(1);
4947 unsigned Align = Op.getConstantOperandVal(3);
4948 auto PtrVT = getPointerTy(DAG.getDataLayout());
4949
4950 SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V));
4951 Chain = VAList.getValue(1);
4952
4953 if (Align > 8) {
4954 assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2")(static_cast <bool> (((Align & (Align - 1)) == 0) &&
"Expected Align to be a power of 2") ? void (0) : __assert_fail
("((Align & (Align - 1)) == 0) && \"Expected Align to be a power of 2\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4954, __extension__ __PRETTY_FUNCTION__))
;
4955 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4956 DAG.getConstant(Align - 1, DL, PtrVT));
4957 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
4958 DAG.getConstant(-(int64_t)Align, DL, PtrVT));
4959 }
4960
4961 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
4962 uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
4963
4964 // Scalar integer and FP values smaller than 64 bits are implicitly extended
4965 // up to 64 bits. At the very least, we have to increase the striding of the
4966 // vaargs list to match this, and for FP values we need to introduce
4967 // FP_ROUND nodes as well.
4968 if (VT.isInteger() && !VT.isVector())
4969 ArgSize = 8;
4970 bool NeedFPTrunc = false;
4971 if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
4972 ArgSize = 8;
4973 NeedFPTrunc = true;
4974 }
4975
4976 // Increment the pointer, VAList, to the next vaarg
4977 SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4978 DAG.getConstant(ArgSize, DL, PtrVT));
4979 // Store the incremented VAList to the legalized pointer
4980 SDValue APStore =
4981 DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
4982
4983 // Load the actual argument out of the pointer VAList
4984 if (NeedFPTrunc) {
4985 // Load the value as an f64.
4986 SDValue WideFP =
4987 DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
4988 // Round the value down to an f32.
4989 SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
4990 DAG.getIntPtrConstant(1, DL));
4991 SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
4992 // Merge the rounded value with the chain output of the load.
4993 return DAG.getMergeValues(Ops, DL);
4994 }
4995
4996 return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
4997}
4998
4999SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
5000 SelectionDAG &DAG) const {
5001 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
5002 MFI.setFrameAddressIsTaken(true);
5003
5004 EVT VT = Op.getValueType();
5005 SDLoc DL(Op);
5006 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5007 SDValue FrameAddr =
5008 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
5009 while (Depth--)
5010 FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
5011 MachinePointerInfo());
5012 return FrameAddr;
5013}
5014
5015// FIXME? Maybe this could be a TableGen attribute on some registers and
5016// this table could be generated automatically from RegInfo.
5017unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
5018 SelectionDAG &DAG) const {
5019 unsigned Reg = StringSwitch<unsigned>(RegName)
5020 .Case("sp", AArch64::SP)
5021 .Case("x18", AArch64::X18)
5022 .Case("w18", AArch64::W18)
5023 .Case("x20", AArch64::X20)
5024 .Case("w20", AArch64::W20)
5025 .Default(0);
5026 if (((Reg == AArch64::X18 || Reg == AArch64::W18) &&
5027 !Subtarget->isX18Reserved()) ||
5028 ((Reg == AArch64::X20 || Reg == AArch64::W20) &&
5029 !Subtarget->isX20Reserved()))
5030 Reg = 0;
5031 if (Reg)
5032 return Reg;
5033 report_fatal_error(Twine("Invalid register name \""
5034 + StringRef(RegName) + "\"."));
5035}
5036
5037SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
5038 SelectionDAG &DAG) const {
5039 MachineFunction &MF = DAG.getMachineFunction();
5040 MachineFrameInfo &MFI = MF.getFrameInfo();
5041 MFI.setReturnAddressIsTaken(true);
5042
5043 EVT VT = Op.getValueType();
5044 SDLoc DL(Op);
5045 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
5046 if (Depth) {
5047 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
5048 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
5049 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
5050 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
5051 MachinePointerInfo());
5052 }
5053
5054 // Return LR, which contains the return address. Mark it an implicit live-in.
5055 unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
5056 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
5057}
5058
5059/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
5060/// i64 values and take a 2 x i64 value to shift plus a shift amount.
5061SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
5062 SelectionDAG &DAG) const {
5063 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5063, __extension__ __PRETTY_FUNCTION__))
;
5064 EVT VT = Op.getValueType();
5065 unsigned VTBits = VT.getSizeInBits();
5066 SDLoc dl(Op);
5067 SDValue ShOpLo = Op.getOperand(0);
5068 SDValue ShOpHi = Op.getOperand(1);
5069 SDValue ShAmt = Op.getOperand(2);
5070 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
5071
5072 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SRA_PARTS ||
Op.getOpcode() == ISD::SRL_PARTS) ? void (0) : __assert_fail
("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5072, __extension__ __PRETTY_FUNCTION__))
;
5073
5074 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
5075 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
5076 SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
5077
5078 // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
5079 // is "undef". We wanted 0, so CSEL it directly.
5080 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
5081 ISD::SETEQ, dl, DAG);
5082 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
5083 HiBitsForLo =
5084 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
5085 HiBitsForLo, CCVal, Cmp);
5086
5087 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
5088 DAG.getConstant(VTBits, dl, MVT::i64));
5089
5090 SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
5091 SDValue LoForNormalShift =
5092 DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
5093
5094 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
5095 dl, DAG);
5096 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
5097 SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
5098 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
5099 LoForNormalShift, CCVal, Cmp);
5100
5101 // AArch64 shifts larger than the register width are wrapped rather than
5102 // clamped, so we can't just emit "hi >> x".
5103 SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
5104 SDValue HiForBigShift =
5105 Opc == ISD::SRA
5106 ? DAG.getNode(Opc, dl, VT, ShOpHi,
5107 DAG.getConstant(VTBits - 1, dl, MVT::i64))
5108 : DAG.getConstant(0, dl, VT);
5109 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
5110 HiForNormalShift, CCVal, Cmp);
5111
5112 SDValue Ops[2] = { Lo, Hi };
5113 return DAG.getMergeValues(Ops, dl);
5114}
5115
5116/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
5117/// i64 values and take a 2 x i64 value to shift plus a shift amount.
5118SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
5119 SelectionDAG &DAG) const {
5120 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5120, __extension__ __PRETTY_FUNCTION__))
;
5121 EVT VT = Op.getValueType();
5122 unsigned VTBits = VT.getSizeInBits();
5123 SDLoc dl(Op);
5124 SDValue ShOpLo = Op.getOperand(0);
5125 SDValue ShOpHi = Op.getOperand(1);
5126 SDValue ShAmt = Op.getOperand(2);
5127
5128 assert(Op.getOpcode() == ISD::SHL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SHL_PARTS) ?
void (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5128, __extension__ __PRETTY_FUNCTION__))
;
5129 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
5130 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
5131 SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
5132
5133 // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
5134 // is "undef". We wanted 0, so CSEL it directly.
5135 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
5136 ISD::SETEQ, dl, DAG);
5137 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
5138 LoBitsForHi =
5139 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
5140 LoBitsForHi, CCVal, Cmp);
5141
5142 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
5143 DAG.getConstant(VTBits, dl, MVT::i64));
5144 SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
5145 SDValue HiForNormalShift =
5146 DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
5147
5148 SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
5149
5150 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
5151 dl, DAG);
5152 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
5153 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
5154 HiForNormalShift, CCVal, Cmp);
5155
5156 // AArch64 shifts of larger than register sizes are wrapped rather than
5157 // clamped, so we can't just emit "lo << a" if a is too big.
5158 SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
5159 SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
5160 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
5161 LoForNormalShift, CCVal, Cmp);
5162
5163 SDValue Ops[2] = { Lo, Hi };
5164 return DAG.getMergeValues(Ops, dl);
5165}
5166
5167bool AArch64TargetLowering::isOffsetFoldingLegal(
5168 const GlobalAddressSDNode *GA) const {
5169 // Offsets are folded in the DAG combine rather than here so that we can
5170 // intelligently choose an offset based on the uses.
5171 return false;
5172}
5173
5174bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
5175 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
5176 // FIXME: We should be able to handle f128 as well with a clever lowering.
5177 if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
5178 (VT == MVT::f16 && Subtarget->hasFullFP16()))) {
5179 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal fp imm: materialize 0 using the zero register\n"
; } } while (false)
5180 dbgs() << "Legal fp imm: materialize 0 using the zero register\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal fp imm: materialize 0 using the zero register\n"
; } } while (false)
;
5181 return true;
5182 }
5183
5184 StringRef FPType;
5185 bool IsLegal = false;
5186 SmallString<128> ImmStrVal;
5187 Imm.toString(ImmStrVal);
5188
5189 if (VT == MVT::f64) {
5190 FPType = "f64";
5191 IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
5192 } else if (VT == MVT::f32) {
5193 FPType = "f32";
5194 IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
5195 } else if (VT == MVT::f16 && Subtarget->hasFullFP16()) {
5196 FPType = "f16";
5197 IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
5198 }
5199
5200 if (IsLegal) {
5201 LLVM_DEBUG(dbgs() << "Legal " << FPType << " imm value: " << ImmStrValdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal " << FPType
<< " imm value: " << ImmStrVal << "\n"; } }
while (false)
5202 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal " << FPType
<< " imm value: " << ImmStrVal << "\n"; } }
while (false)
;
5203 return true;
5204 }
5205
5206 if (!FPType.empty())
5207 LLVM_DEBUG(dbgs() << "Illegal " << FPType << " imm value: " << ImmStrValdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal " << FPType
<< " imm value: " << ImmStrVal << "\n"; } }
while (false)
5208 << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal " << FPType
<< " imm value: " << ImmStrVal << "\n"; } }
while (false)
;
5209 else
5210 LLVM_DEBUG(dbgs() << "Illegal fp imm " << ImmStrValdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal fp imm " <<
ImmStrVal << ": unsupported fp type\n"; } } while (false
)
5211 << ": unsupported fp type\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal fp imm " <<
ImmStrVal << ": unsupported fp type\n"; } } while (false
)
;
5212
5213 return false;
5214}
5215
5216//===----------------------------------------------------------------------===//
5217// AArch64 Optimization Hooks
5218//===----------------------------------------------------------------------===//
5219
5220static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
5221 SDValue Operand, SelectionDAG &DAG,
5222 int &ExtraSteps) {
5223 EVT VT = Operand.getValueType();
5224 if (ST->hasNEON() &&
5225 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
5226 VT == MVT::f32 || VT == MVT::v1f32 ||
5227 VT == MVT::v2f32 || VT == MVT::v4f32)) {
5228 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
5229 // For the reciprocal estimates, convergence is quadratic, so the number
5230 // of digits is doubled after each iteration. In ARMv8, the accuracy of
5231 // the initial estimate is 2^-8. Thus the number of extra steps to refine
5232 // the result for float (23 mantissa bits) is 2 and for double (52
5233 // mantissa bits) is 3.
5234 ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
5235
5236 return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
5237 }
5238
5239 return SDValue();
5240}
5241
5242SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
5243 SelectionDAG &DAG, int Enabled,
5244 int &ExtraSteps,
5245 bool &UseOneConst,
5246 bool Reciprocal) const {
5247 if (Enabled == ReciprocalEstimate::Enabled ||
5248 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
5249 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
5250 DAG, ExtraSteps)) {
5251 SDLoc DL(Operand);
5252 EVT VT = Operand.getValueType();
5253
5254 SDNodeFlags Flags;
5255 Flags.setAllowReassociation(true);
5256
5257 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
5258 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
5259 for (int i = ExtraSteps; i > 0; --i) {
5260 SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
5261 Flags);
5262 Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
5263 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5264 }
5265 if (!Reciprocal) {
5266 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
5267 VT);
5268 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
5269 SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
5270
5271 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
5272 // Correct the result if the operand is 0.0.
5273 Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
5274 VT, Eq, Operand, Estimate);
5275 }
5276
5277 ExtraSteps = 0;
5278 return Estimate;
5279 }
5280
5281 return SDValue();
5282}
5283
5284SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
5285 SelectionDAG &DAG, int Enabled,
5286 int &ExtraSteps) const {
5287 if (Enabled == ReciprocalEstimate::Enabled)
5288 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
5289 DAG, ExtraSteps)) {
5290 SDLoc DL(Operand);
5291 EVT VT = Operand.getValueType();
5292
5293 SDNodeFlags Flags;
5294 Flags.setAllowReassociation(true);
5295
5296 // Newton reciprocal iteration: E * (2 - X * E)
5297 // AArch64 reciprocal iteration instruction: (2 - M * N)
5298 for (int i = ExtraSteps; i > 0; --i) {
5299 SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
5300 Estimate, Flags);
5301 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5302 }
5303
5304 ExtraSteps = 0;
5305 return Estimate;
5306 }
5307
5308 return SDValue();
5309}
5310
5311//===----------------------------------------------------------------------===//
5312// AArch64 Inline Assembly Support
5313//===----------------------------------------------------------------------===//
5314
5315// Table of Constraints
5316// TODO: This is the current set of constraints supported by ARM for the
5317// compiler, not all of them may make sense.
5318//
5319// r - A general register
5320// w - An FP/SIMD register of some size in the range v0-v31
5321// x - An FP/SIMD register of some size in the range v0-v15
5322// I - Constant that can be used with an ADD instruction
5323// J - Constant that can be used with a SUB instruction
5324// K - Constant that can be used with a 32-bit logical instruction
5325// L - Constant that can be used with a 64-bit logical instruction
5326// M - Constant that can be used as a 32-bit MOV immediate
5327// N - Constant that can be used as a 64-bit MOV immediate
5328// Q - A memory reference with base register and no offset
5329// S - A symbolic address
5330// Y - Floating point constant zero
5331// Z - Integer constant zero
5332//
5333// Note that general register operands will be output using their 64-bit x
5334// register name, whatever the size of the variable, unless the asm operand
5335// is prefixed by the %w modifier. Floating-point and SIMD register operands
5336// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
5337// %q modifier.
5338const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5339 // At this point, we have to lower this constraint to something else, so we
5340 // lower it to an "r" or "w". However, by doing this we will force the result
5341 // to be in register, while the X constraint is much more permissive.
5342 //
5343 // Although we are correct (we are free to emit anything, without
5344 // constraints), we might break use cases that would expect us to be more
5345 // efficient and emit something else.
5346 if (!Subtarget->hasFPARMv8())
5347 return "r";
5348
5349 if (ConstraintVT.isFloatingPoint())
5350 return "w";
5351
5352 if (ConstraintVT.isVector() &&
5353 (ConstraintVT.getSizeInBits() == 64 ||
5354 ConstraintVT.getSizeInBits() == 128))
5355 return "w";
5356
5357 return "r";
5358}
5359
5360/// getConstraintType - Given a constraint letter, return the type of
5361/// constraint it is for this target.
5362AArch64TargetLowering::ConstraintType
5363AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
5364 if (Constraint.size() == 1) {
5365 switch (Constraint[0]) {
5366 default:
5367 break;
5368 case 'z':
5369 return C_Other;
5370 case 'x':
5371 case 'w':
5372 return C_RegisterClass;
5373 // An address with a single base register. Due to the way we
5374 // currently handle addresses it is the same as 'r'.
5375 case 'Q':
5376 return C_Memory;
5377 case 'S': // A symbolic address
5378 return C_Other;
5379 }
5380 }
5381 return TargetLowering::getConstraintType(Constraint);
5382}
5383
5384/// Examine constraint type and operand type and determine a weight value.
5385/// This object must already have been set up with the operand type
5386/// and the current alternative constraint selected.
5387TargetLowering::ConstraintWeight
5388AArch64TargetLowering::getSingleConstraintMatchWeight(
5389 AsmOperandInfo &info, const char *constraint) const {
5390 ConstraintWeight weight = CW_Invalid;
5391 Value *CallOperandVal = info.CallOperandVal;
5392 // If we don't have a value, we can't do a match,
5393 // but allow it at the lowest weight.
5394 if (!CallOperandVal)
5395 return CW_Default;
5396 Type *type = CallOperandVal->getType();
5397 // Look at the constraint type.
5398 switch (*constraint) {
5399 default:
5400 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
5401 break;
5402 case 'x':
5403 case 'w':
5404 if (type->isFloatingPointTy() || type->isVectorTy())
5405 weight = CW_Register;
5406 break;
5407 case 'z':
5408 weight = CW_Constant;
5409 break;
5410 }
5411 return weight;
5412}
5413
5414std::pair<unsigned, const TargetRegisterClass *>
5415AArch64TargetLowering::getRegForInlineAsmConstraint(
5416 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5417 if (Constraint.size() == 1) {
5418 switch (Constraint[0]) {
5419 case 'r':
5420 if (VT.getSizeInBits() == 64)
5421 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
5422 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
5423 case 'w':
5424 if (VT.getSizeInBits() == 16)
5425 return std::make_pair(0U, &AArch64::FPR16RegClass);
5426 if (VT.getSizeInBits() == 32)
5427 return std::make_pair(0U, &AArch64::FPR32RegClass);
5428 if (VT.getSizeInBits() == 64)
5429 return std::make_pair(0U, &AArch64::FPR64RegClass);
5430 if (VT.getSizeInBits() == 128)
5431 return std::make_pair(0U, &AArch64::FPR128RegClass);
5432 break;
5433 // The instructions that this constraint is designed for can
5434 // only take 128-bit registers so just use that regclass.
5435 case 'x':
5436 if (VT.getSizeInBits() == 128)
5437 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
5438 break;
5439 }
5440 }
5441 if (StringRef("{cc}").equals_lower(Constraint))
5442 return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
5443
5444 // Use the default implementation in TargetLowering to convert the register
5445 // constraint into a member of a register class.
5446 std::pair<unsigned, const TargetRegisterClass *> Res;
5447 Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5448
5449 // Not found as a standard register?
5450 if (!Res.second) {
5451 unsigned Size = Constraint.size();
5452 if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
5453 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
5454 int RegNo;
5455 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
5456 if (!Failed && RegNo >= 0 && RegNo <= 31) {
5457 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
5458 // By default we'll emit v0-v31 for this unless there's a modifier where
5459 // we'll emit the correct register as well.
5460 if (VT != MVT::Other && VT.getSizeInBits() == 64) {
5461 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
5462 Res.second = &AArch64::FPR64RegClass;
5463 } else {
5464 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
5465 Res.second = &AArch64::FPR128RegClass;
5466 }
5467 }
5468 }
5469 }
5470
5471 return Res;
5472}
5473
5474/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5475/// vector. If it is invalid, don't add anything to Ops.
5476void AArch64TargetLowering::LowerAsmOperandForConstraint(
5477 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
5478 SelectionDAG &DAG) const {
5479 SDValue Result;
5480
5481 // Currently only support length 1 constraints.
5482 if (Constraint.length() != 1)
5483 return;
5484
5485 char ConstraintLetter = Constraint[0];
5486 switch (ConstraintLetter) {
5487 default:
5488 break;
5489
5490 // This set of constraints deal with valid constants for various instructions.
5491 // Validate and return a target constant for them if we can.
5492 case 'z': {
5493 // 'z' maps to xzr or wzr so it needs an input of 0.
5494 if (!isNullConstant(Op))
5495 return;
5496
5497 if (Op.getValueType() == MVT::i64)
5498 Result = DAG.getRegister(AArch64::XZR, MVT::i64);
5499 else
5500 Result = DAG.getRegister(AArch64::WZR, MVT::i32);
5501 break;
5502 }
5503 case 'S': {
5504 // An absolute symbolic address or label reference.
5505 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op)) {
5506 Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
5507 GA->getValueType(0));
5508 } else if (const BlockAddressSDNode *BA =
5509 dyn_cast<BlockAddressSDNode>(Op)) {
5510 Result =
5511 DAG.getTargetBlockAddress(BA->getBlockAddress(), BA->getValueType(0));
5512 } else if (const ExternalSymbolSDNode *ES =
5513 dyn_cast<ExternalSymbolSDNode>(Op)) {
5514 Result =
5515 DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0));
5516 } else
5517 return;
5518 break;
5519 }
5520
5521 case 'I':
5522 case 'J':
5523 case 'K':
5524 case 'L':
5525 case 'M':
5526 case 'N':
5527 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5528 if (!C)
5529 return;
5530
5531 // Grab the value and do some validation.
5532 uint64_t CVal = C->getZExtValue();
5533 switch (ConstraintLetter) {
5534 // The I constraint applies only to simple ADD or SUB immediate operands:
5535 // i.e. 0 to 4095 with optional shift by 12
5536 // The J constraint applies only to ADD or SUB immediates that would be
5537 // valid when negated, i.e. if [an add pattern] were to be output as a SUB
5538 // instruction [or vice versa], in other words -1 to -4095 with optional
5539 // left shift by 12.
5540 case 'I':
5541 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
5542 break;
5543 return;
5544 case 'J': {
5545 uint64_t NVal = -C->getSExtValue();
5546 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
5547 CVal = C->getSExtValue();
5548 break;
5549 }
5550 return;
5551 }
5552 // The K and L constraints apply *only* to logical immediates, including
5553 // what used to be the MOVI alias for ORR (though the MOVI alias has now
5554 // been removed and MOV should be used). So these constraints have to
5555 // distinguish between bit patterns that are valid 32-bit or 64-bit
5556 // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
5557 // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
5558 // versa.
5559 case 'K':
5560 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5561 break;
5562 return;
5563 case 'L':
5564 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5565 break;
5566 return;
5567 // The M and N constraints are a superset of K and L respectively, for use
5568 // with the MOV (immediate) alias. As well as the logical immediates they
5569 // also match 32 or 64-bit immediates that can be loaded either using a
5570 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
5571 // (M) or 64-bit 0x1234000000000000 (N) etc.
5572 // As a note some of this code is liberally stolen from the asm parser.
5573 case 'M': {
5574 if (!isUInt<32>(CVal))
5575 return;
5576 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5577 break;
5578 if ((CVal & 0xFFFF) == CVal)
5579 break;
5580 if ((CVal & 0xFFFF0000ULL) == CVal)
5581 break;
5582 uint64_t NCVal = ~(uint32_t)CVal;
5583 if ((NCVal & 0xFFFFULL) == NCVal)
5584 break;
5585 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5586 break;
5587 return;
5588 }
5589 case 'N': {
5590 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5591 break;
5592 if ((CVal & 0xFFFFULL) == CVal)
5593 break;
5594 if ((CVal & 0xFFFF0000ULL) == CVal)
5595 break;
5596 if ((CVal & 0xFFFF00000000ULL) == CVal)
5597 break;
5598 if ((CVal & 0xFFFF000000000000ULL) == CVal)
5599 break;
5600 uint64_t NCVal = ~CVal;
5601 if ((NCVal & 0xFFFFULL) == NCVal)
5602 break;
5603 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5604 break;
5605 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
5606 break;
5607 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
5608 break;
5609 return;
5610 }
5611 default:
5612 return;
5613 }
5614
5615 // All assembler immediates are 64-bit integers.
5616 Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
5617 break;
5618 }
5619
5620 if (Result.getNode()) {
5621 Ops.push_back(Result);
5622 return;
5623 }
5624
5625 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5626}
5627
5628//===----------------------------------------------------------------------===//
5629// AArch64 Advanced SIMD Support
5630//===----------------------------------------------------------------------===//
5631
5632/// WidenVector - Given a value in the V64 register class, produce the
5633/// equivalent value in the V128 register class.
5634static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
5635 EVT VT = V64Reg.getValueType();
5636 unsigned NarrowSize = VT.getVectorNumElements();
5637 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5638 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
5639 SDLoc DL(V64Reg);
5640
5641 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
5642 V64Reg, DAG.getConstant(0, DL, MVT::i32));
5643}
5644
5645/// getExtFactor - Determine the adjustment factor for the position when
5646/// generating an "extract from vector registers" instruction.
5647static unsigned getExtFactor(SDValue &V) {
5648 EVT EltType = V.getValueType().getVectorElementType();
5649 return EltType.getSizeInBits() / 8;
5650}
5651
5652/// NarrowVector - Given a value in the V128 register class, produce the
5653/// equivalent value in the V64 register class.
5654static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
5655 EVT VT = V128Reg.getValueType();
5656 unsigned WideSize = VT.getVectorNumElements();
5657 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5658 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
5659 SDLoc DL(V128Reg);
5660
5661 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
5662}
5663
5664// Gather data to see if the operation can be modelled as a
5665// shuffle in combination with VEXTs.
5666SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
5667 SelectionDAG &DAG) const {
5668 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5668, __extension__ __PRETTY_FUNCTION__))
;
5669 LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::ReconstructShuffle\n"
; } } while (false)
;
5670 SDLoc dl(Op);
5671 EVT VT = Op.getValueType();
5672 unsigned NumElts = VT.getVectorNumElements();
5673
5674 struct ShuffleSourceInfo {
5675 SDValue Vec;
5676 unsigned MinElt;
5677 unsigned MaxElt;
5678
5679 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
5680 // be compatible with the shuffle we intend to construct. As a result
5681 // ShuffleVec will be some sliding window into the original Vec.
5682 SDValue ShuffleVec;
5683
5684 // Code should guarantee that element i in Vec starts at element "WindowBase
5685 // + i * WindowScale in ShuffleVec".
5686 int WindowBase;
5687 int WindowScale;
5688
5689 ShuffleSourceInfo(SDValue Vec)
5690 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
5691 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
5692
5693 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
5694 };
5695
5696 // First gather all vectors used as an immediate source for this BUILD_VECTOR
5697 // node.
5698 SmallVector<ShuffleSourceInfo, 2> Sources;
5699 for (unsigned i = 0; i < NumElts; ++i) {
5700 SDValue V = Op.getOperand(i);
5701 if (V.isUndef())
5702 continue;
5703 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5704 !isa<ConstantSDNode>(V.getOperand(1))) {
5705 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5706 dbgs() << "Reshuffle failed: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5707 "a shuffle can only come from building a vector from "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5708 "various elements of other vectors, provided their "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5709 "indices are constant\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
;
5710 return SDValue();
5711 }
5712
5713 // Add this element source to the list if it's not already there.
5714 SDValue SourceVec = V.getOperand(0);
5715 auto Source = find(Sources, SourceVec);
5716 if (Source == Sources.end())
5717 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
5718
5719 // Update the minimum and maximum lane number seen.
5720 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
5721 Source->MinElt = std::min(Source->MinElt, EltNo);
5722 Source->MaxElt = std::max(Source->MaxElt, EltNo);
5723 }
5724
5725 if (Sources.size() > 2) {
5726 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
5727 dbgs() << "Reshuffle failed: currently only do something sane when at "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
5728 "most two source vectors are involved\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
;
5729 return SDValue();
5730 }
5731
5732 // Find out the smallest element size among result and two sources, and use
5733 // it as element size to build the shuffle_vector.
5734 EVT SmallestEltTy = VT.getVectorElementType();
5735 for (auto &Source : Sources) {
5736 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
5737 if (SrcEltTy.bitsLT(SmallestEltTy)) {
5738 SmallestEltTy = SrcEltTy;
5739 }
5740 }
5741 unsigned ResMultiplier =
5742 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
5743 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
5744 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
5745
5746 // If the source vector is too wide or too narrow, we may nevertheless be able
5747 // to construct a compatible shuffle either by concatenating it with UNDEF or
5748 // extracting a suitable range of elements.
5749 for (auto &Src : Sources) {
5750 EVT SrcVT = Src.ShuffleVec.getValueType();
5751
5752 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
5753 continue;
5754
5755 // This stage of the search produces a source with the same element type as
5756 // the original, but with a total width matching the BUILD_VECTOR output.
5757 EVT EltVT = SrcVT.getVectorElementType();
5758 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
5759 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
5760
5761 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
5762 assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits())(static_cast <bool> (2 * SrcVT.getSizeInBits() == VT.getSizeInBits
()) ? void (0) : __assert_fail ("2 * SrcVT.getSizeInBits() == VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5762, __extension__ __PRETTY_FUNCTION__))
;
5763 // We can pad out the smaller vector for free, so if it's part of a
5764 // shuffle...
5765 Src.ShuffleVec =
5766 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
5767 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
5768 continue;
5769 }
5770
5771 assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits())(static_cast <bool> (SrcVT.getSizeInBits() == 2 * VT.getSizeInBits
()) ? void (0) : __assert_fail ("SrcVT.getSizeInBits() == 2 * VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5771, __extension__ __PRETTY_FUNCTION__))
;
5772
5773 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
5774 LLVM_DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
5775 dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
;
5776 return SDValue();
5777 }
5778
5779 if (Src.MinElt >= NumSrcElts) {
5780 // The extraction can just take the second half
5781 Src.ShuffleVec =
5782 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5783 DAG.getConstant(NumSrcElts, dl, MVT::i64));
5784 Src.WindowBase = -NumSrcElts;
5785 } else if (Src.MaxElt < NumSrcElts) {
5786 // The extraction can just take the first half
5787 Src.ShuffleVec =
5788 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5789 DAG.getConstant(0, dl, MVT::i64));
5790 } else {
5791 // An actual VEXT is needed
5792 SDValue VEXTSrc1 =
5793 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5794 DAG.getConstant(0, dl, MVT::i64));
5795 SDValue VEXTSrc2 =
5796 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5797 DAG.getConstant(NumSrcElts, dl, MVT::i64));
5798 unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
5799
5800 Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
5801 VEXTSrc2,
5802 DAG.getConstant(Imm, dl, MVT::i32));
5803 Src.WindowBase = -Src.MinElt;
5804 }
5805 }
5806
5807 // Another possible incompatibility occurs from the vector element types. We
5808 // can fix this by bitcasting the source vectors to the same type we intend
5809 // for the shuffle.
5810 for (auto &Src : Sources) {
5811 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
5812 if (SrcEltTy == SmallestEltTy)
5813 continue;
5814 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)(static_cast <bool> (ShuffleVT.getVectorElementType() ==
SmallestEltTy) ? void (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5814, __extension__ __PRETTY_FUNCTION__))
;
5815 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
5816 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
5817 Src.WindowBase *= Src.WindowScale;
5818 }
5819
5820 // Final sanity check before we try to actually produce a shuffle.
5821 LLVM_DEBUG(for (auto Srcdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5823, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
5822 : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5823, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
5823 assert(Src.ShuffleVec.getValueType() == ShuffleVT);)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-7~svn338205/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5823, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
;
5824
5825 // The stars all align, our next step is to produce the mask for the shuffle.
5826 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
5827 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
5828 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
5829 SDValue Entry = Op.getOperand(i);
5830 if (Entry.isUndef())
5831 continue;
5832
5833 auto Src = find(Sources, Entry.getOperand(0));
5834 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
5835
5836 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
5837 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
5838 // segment.
5839 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
5840 int BitsDefined =
5841 std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits());
5842 int LanesDefined = BitsDefined / BitsPerShuffleLane;
5843
5844 // This source is expected to fill ResMultiplier lanes of the final shuffle,
5845 // starting at the appropriate offset.
5846 int *LaneMask = &Mask[i * ResMultiplier];
5847
5848 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
5849 ExtractBase += NumElts * (Src - Sources.begin());
5850 for (int j = 0; j < LanesDefined; ++j)
5851 LaneMask[j] = ExtractBase + j;
5852 }
5853
5854 // Final check before we try to produce nonsense...
5855 if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
5856 LLVM_DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: illegal shuffle mask\n"
; } } while (false)
;
5857 return SDValue();
5858 }
5859
5860 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
5861 for (unsigned i = 0; i < Sources.size(); ++i)
5862 ShuffleOps[i] = Sources[i].ShuffleVec;
5863
5864 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
5865 ShuffleOps[1], Mask);
5866 SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
5867
5868 LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5869 dbgs() << "Reshuffle, creating node: "; V.dump();)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
;
5870
5871 return V;
5872}
5873
5874// check if an EXT instruction can handle the shuffle mask when the
5875// vector sources of the shuffle are the same.
5876static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5877 unsigned NumElts = VT.getVectorNumElements();
5878
5879 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5880 if (M[0] < 0)
5881 return false;
5882
5883 Imm = M[0];
5884
5885 // If this is a VEXT shuffle, the immediate value is the index of the first
5886 // element. The other shuffle indices must be the successive elements after
5887 // the first one.
5888 unsigned ExpectedElt = Imm;
5889 for (unsigned i = 1; i < NumElts; ++i) {
5890 // Increment the expected index. If it wraps around, just follow it
5891 // back to index zero and keep going.
5892 ++ExpectedElt;
5893 if (ExpectedElt == NumElts)
5894 ExpectedElt = 0;
5895
5896 if (M[i] < 0)
5897 continue; // ignore UNDEF indices
5898 if (ExpectedElt != static_cast<unsigned>(M[i]))
5899 return false;
5900 }
5901
5902 return true;
5903}
5904
5905// check if an EXT instruction can handle the shuffle mask when the
5906// vector sources of the shuffle are different.
5907static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
5908 unsigned &Imm) {
5909 // Look for the first non-undef element.
5910 const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
5911
5912 // Benefit form APInt to handle overflow when calculating expected element.
5913 unsigned NumElts = VT.getVectorNumElements();
5914 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
5915 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
5916 // The following shuffle indices must be the successive elements after the
5917 // first real element.
5918 const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
5919 [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
5920 if (FirstWrongElt != M.end())
5921 return false;
5922
5923 // The index of an EXT is the first element if it is not UNDEF.
5924 // Watch out for the beginning UNDEFs. The EXT index should be the expected
5925 // value of the first element. E.g.
5926 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
5927 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
5928 // ExpectedElt is the last mask index plus 1.
5929 Imm = ExpectedElt.getZExtValue();
5930
5931 // There are two difference cases requiring to reverse input vectors.
5932