Bug Summary

File:lib/Target/AArch64/AArch64ISelLowering.cpp
Warning:line 8202, column 48
The result of the left shift is undefined due to shifting by '64', which is greater or equal to the width of type 'unsigned long long'

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-eagerly-assume -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -mrelocation-model pic -pic-level 2 -mthread-model posix -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -momit-leaf-frame-pointer -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-7/lib/clang/7.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-7~svn325874/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-7~svn325874/build-llvm/include -I /build/llvm-toolchain-snapshot-7~svn325874/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/x86_64-linux-gnu/c++/7.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/7.3.0/../../../../include/c++/7.3.0/backward -internal-isystem /usr/include/clang/7.0.0/include/ -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-7/lib/clang/7.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++11 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-7~svn325874/build-llvm/lib/Target/AArch64 -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-checker optin.performance.Padding -analyzer-output=html -analyzer-config stable-report-filename=true -o /tmp/scan-build-2018-02-23-163436-368-1 -x c++ /build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp
1//===-- AArch64ISelLowering.cpp - AArch64 DAG Lowering Implementation ----===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the AArch64TargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64ISelLowering.h"
15#include "AArch64CallingConvention.h"
16#include "AArch64MachineFunctionInfo.h"
17#include "AArch64PerfectShuffle.h"
18#include "AArch64RegisterInfo.h"
19#include "AArch64Subtarget.h"
20#include "MCTargetDesc/AArch64AddressingModes.h"
21#include "Utils/AArch64BaseInfo.h"
22#include "llvm/ADT/APFloat.h"
23#include "llvm/ADT/APInt.h"
24#include "llvm/ADT/ArrayRef.h"
25#include "llvm/ADT/STLExtras.h"
26#include "llvm/ADT/SmallVector.h"
27#include "llvm/ADT/Statistic.h"
28#include "llvm/ADT/StringRef.h"
29#include "llvm/ADT/StringSwitch.h"
30#include "llvm/ADT/Triple.h"
31#include "llvm/ADT/Twine.h"
32#include "llvm/Analysis/VectorUtils.h"
33#include "llvm/CodeGen/CallingConvLower.h"
34#include "llvm/CodeGen/MachineBasicBlock.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstr.h"
38#include "llvm/CodeGen/MachineInstrBuilder.h"
39#include "llvm/CodeGen/MachineMemOperand.h"
40#include "llvm/CodeGen/MachineRegisterInfo.h"
41#include "llvm/CodeGen/MachineValueType.h"
42#include "llvm/CodeGen/RuntimeLibcalls.h"
43#include "llvm/CodeGen/SelectionDAG.h"
44#include "llvm/CodeGen/SelectionDAGNodes.h"
45#include "llvm/CodeGen/TargetCallingConv.h"
46#include "llvm/CodeGen/TargetInstrInfo.h"
47#include "llvm/CodeGen/ValueTypes.h"
48#include "llvm/IR/Attributes.h"
49#include "llvm/IR/Constants.h"
50#include "llvm/IR/DataLayout.h"
51#include "llvm/IR/DebugLoc.h"
52#include "llvm/IR/DerivedTypes.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/GetElementPtrTypeIterator.h"
55#include "llvm/IR/GlobalValue.h"
56#include "llvm/IR/IRBuilder.h"
57#include "llvm/IR/Instruction.h"
58#include "llvm/IR/Instructions.h"
59#include "llvm/IR/Intrinsics.h"
60#include "llvm/IR/Module.h"
61#include "llvm/IR/OperandTraits.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/Value.h"
65#include "llvm/MC/MCRegisterInfo.h"
66#include "llvm/Support/Casting.h"
67#include "llvm/Support/CodeGen.h"
68#include "llvm/Support/CommandLine.h"
69#include "llvm/Support/Compiler.h"
70#include "llvm/Support/Debug.h"
71#include "llvm/Support/ErrorHandling.h"
72#include "llvm/Support/KnownBits.h"
73#include "llvm/Support/MathExtras.h"
74#include "llvm/Support/raw_ostream.h"
75#include "llvm/Target/TargetMachine.h"
76#include "llvm/Target/TargetOptions.h"
77#include <algorithm>
78#include <bitset>
79#include <cassert>
80#include <cctype>
81#include <cstdint>
82#include <cstdlib>
83#include <iterator>
84#include <limits>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90
91#define DEBUG_TYPE"aarch64-lower" "aarch64-lower"
92
93STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"aarch64-lower", "NumTailCalls"
, "Number of tail calls", {0}, {false}}
;
94STATISTIC(NumShiftInserts, "Number of vector shift inserts")static llvm::Statistic NumShiftInserts = {"aarch64-lower", "NumShiftInserts"
, "Number of vector shift inserts", {0}, {false}}
;
95STATISTIC(NumOptimizedImms, "Number of times immediates were optimized")static llvm::Statistic NumOptimizedImms = {"aarch64-lower", "NumOptimizedImms"
, "Number of times immediates were optimized", {0}, {false}}
;
96
97static cl::opt<bool>
98EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
99 cl::desc("Allow AArch64 SLI/SRI formation"),
100 cl::init(false));
101
102// FIXME: The necessary dtprel relocations don't seem to be supported
103// well in the GNU bfd and gold linkers at the moment. Therefore, by
104// default, for now, fall back to GeneralDynamic code generation.
105cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
106 "aarch64-elf-ldtls-generation", cl::Hidden,
107 cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
108 cl::init(false));
109
110static cl::opt<bool>
111EnableOptimizeLogicalImm("aarch64-enable-logical-imm", cl::Hidden,
112 cl::desc("Enable AArch64 logical imm instruction "
113 "optimization"),
114 cl::init(true));
115
116/// Value type used for condition codes.
117static const MVT MVT_CC = MVT::i32;
118
119AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
120 const AArch64Subtarget &STI)
121 : TargetLowering(TM), Subtarget(&STI) {
122 // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so
123 // we have to make something up. Arbitrarily, choose ZeroOrOne.
124 setBooleanContents(ZeroOrOneBooleanContent);
125 // When comparing vectors the result sets the different elements in the
126 // vector to all-one or all-zero.
127 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
128
129 // Set up the register classes.
130 addRegisterClass(MVT::i32, &AArch64::GPR32allRegClass);
131 addRegisterClass(MVT::i64, &AArch64::GPR64allRegClass);
132
133 if (Subtarget->hasFPARMv8()) {
134 addRegisterClass(MVT::f16, &AArch64::FPR16RegClass);
135 addRegisterClass(MVT::f32, &AArch64::FPR32RegClass);
136 addRegisterClass(MVT::f64, &AArch64::FPR64RegClass);
137 addRegisterClass(MVT::f128, &AArch64::FPR128RegClass);
138 }
139
140 if (Subtarget->hasNEON()) {
141 addRegisterClass(MVT::v16i8, &AArch64::FPR8RegClass);
142 addRegisterClass(MVT::v8i16, &AArch64::FPR16RegClass);
143 // Someone set us up the NEON.
144 addDRTypeForNEON(MVT::v2f32);
145 addDRTypeForNEON(MVT::v8i8);
146 addDRTypeForNEON(MVT::v4i16);
147 addDRTypeForNEON(MVT::v2i32);
148 addDRTypeForNEON(MVT::v1i64);
149 addDRTypeForNEON(MVT::v1f64);
150 addDRTypeForNEON(MVT::v4f16);
151
152 addQRTypeForNEON(MVT::v4f32);
153 addQRTypeForNEON(MVT::v2f64);
154 addQRTypeForNEON(MVT::v16i8);
155 addQRTypeForNEON(MVT::v8i16);
156 addQRTypeForNEON(MVT::v4i32);
157 addQRTypeForNEON(MVT::v2i64);
158 addQRTypeForNEON(MVT::v8f16);
159 }
160
161 // Compute derived properties from the register classes
162 computeRegisterProperties(Subtarget->getRegisterInfo());
163
164 // Provide all sorts of operation actions
165 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
166 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom);
167 setOperationAction(ISD::SETCC, MVT::i32, Custom);
168 setOperationAction(ISD::SETCC, MVT::i64, Custom);
169 setOperationAction(ISD::SETCC, MVT::f16, Custom);
170 setOperationAction(ISD::SETCC, MVT::f32, Custom);
171 setOperationAction(ISD::SETCC, MVT::f64, Custom);
172 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
173 setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
174 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
175 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
176 setOperationAction(ISD::BR_CC, MVT::i64, Custom);
177 setOperationAction(ISD::BR_CC, MVT::f16, Custom);
178 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
179 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
180 setOperationAction(ISD::SELECT, MVT::i32, Custom);
181 setOperationAction(ISD::SELECT, MVT::i64, Custom);
182 setOperationAction(ISD::SELECT, MVT::f16, Custom);
183 setOperationAction(ISD::SELECT, MVT::f32, Custom);
184 setOperationAction(ISD::SELECT, MVT::f64, Custom);
185 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
186 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
187 setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
188 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
189 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
190 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
191 setOperationAction(ISD::JumpTable, MVT::i64, Custom);
192
193 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom);
194 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom);
195 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom);
196
197 setOperationAction(ISD::FREM, MVT::f32, Expand);
198 setOperationAction(ISD::FREM, MVT::f64, Expand);
199 setOperationAction(ISD::FREM, MVT::f80, Expand);
200
201 // Custom lowering hooks are needed for XOR
202 // to fold it into CSINC/CSINV.
203 setOperationAction(ISD::XOR, MVT::i32, Custom);
204 setOperationAction(ISD::XOR, MVT::i64, Custom);
205
206 // Virtually no operation on f128 is legal, but LLVM can't expand them when
207 // there's a valid register class, so we need custom operations in most cases.
208 setOperationAction(ISD::FABS, MVT::f128, Expand);
209 setOperationAction(ISD::FADD, MVT::f128, Custom);
210 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Expand);
211 setOperationAction(ISD::FCOS, MVT::f128, Expand);
212 setOperationAction(ISD::FDIV, MVT::f128, Custom);
213 setOperationAction(ISD::FMA, MVT::f128, Expand);
214 setOperationAction(ISD::FMUL, MVT::f128, Custom);
215 setOperationAction(ISD::FNEG, MVT::f128, Expand);
216 setOperationAction(ISD::FPOW, MVT::f128, Expand);
217 setOperationAction(ISD::FREM, MVT::f128, Expand);
218 setOperationAction(ISD::FRINT, MVT::f128, Expand);
219 setOperationAction(ISD::FSIN, MVT::f128, Expand);
220 setOperationAction(ISD::FSINCOS, MVT::f128, Expand);
221 setOperationAction(ISD::FSQRT, MVT::f128, Expand);
222 setOperationAction(ISD::FSUB, MVT::f128, Custom);
223 setOperationAction(ISD::FTRUNC, MVT::f128, Expand);
224 setOperationAction(ISD::SETCC, MVT::f128, Custom);
225 setOperationAction(ISD::BR_CC, MVT::f128, Custom);
226 setOperationAction(ISD::SELECT, MVT::f128, Custom);
227 setOperationAction(ISD::SELECT_CC, MVT::f128, Custom);
228 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
229
230 // Lowering for many of the conversions is actually specified by the non-f128
231 // type. The LowerXXX function will be trivial when f128 isn't involved.
232 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
233 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
234 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Custom);
235 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
236 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
237 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Custom);
238 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
239 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
240 setOperationAction(ISD::SINT_TO_FP, MVT::i128, Custom);
241 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
242 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
243 setOperationAction(ISD::UINT_TO_FP, MVT::i128, Custom);
244 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
245 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
246
247 // Variable arguments.
248 setOperationAction(ISD::VASTART, MVT::Other, Custom);
249 setOperationAction(ISD::VAARG, MVT::Other, Custom);
250 setOperationAction(ISD::VACOPY, MVT::Other, Custom);
251 setOperationAction(ISD::VAEND, MVT::Other, Expand);
252
253 // Variable-sized objects.
254 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
255 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
256
257 if (Subtarget->isTargetWindows())
258 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
259 else
260 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand);
261
262 // Constant pool entries
263 setOperationAction(ISD::ConstantPool, MVT::i64, Custom);
264
265 // BlockAddress
266 setOperationAction(ISD::BlockAddress, MVT::i64, Custom);
267
268 // Add/Sub overflow ops with MVT::Glues are lowered to NZCV dependences.
269 setOperationAction(ISD::ADDC, MVT::i32, Custom);
270 setOperationAction(ISD::ADDE, MVT::i32, Custom);
271 setOperationAction(ISD::SUBC, MVT::i32, Custom);
272 setOperationAction(ISD::SUBE, MVT::i32, Custom);
273 setOperationAction(ISD::ADDC, MVT::i64, Custom);
274 setOperationAction(ISD::ADDE, MVT::i64, Custom);
275 setOperationAction(ISD::SUBC, MVT::i64, Custom);
276 setOperationAction(ISD::SUBE, MVT::i64, Custom);
277
278 // AArch64 lacks both left-rotate and popcount instructions.
279 setOperationAction(ISD::ROTL, MVT::i32, Expand);
280 setOperationAction(ISD::ROTL, MVT::i64, Expand);
281 for (MVT VT : MVT::vector_valuetypes()) {
282 setOperationAction(ISD::ROTL, VT, Expand);
283 setOperationAction(ISD::ROTR, VT, Expand);
284 }
285
286 // AArch64 doesn't have {U|S}MUL_LOHI.
287 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
288 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
289
290 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
291 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
292
293 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
294 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
295 for (MVT VT : MVT::vector_valuetypes()) {
296 setOperationAction(ISD::SDIVREM, VT, Expand);
297 setOperationAction(ISD::UDIVREM, VT, Expand);
298 }
299 setOperationAction(ISD::SREM, MVT::i32, Expand);
300 setOperationAction(ISD::SREM, MVT::i64, Expand);
301 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
302 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
303 setOperationAction(ISD::UREM, MVT::i32, Expand);
304 setOperationAction(ISD::UREM, MVT::i64, Expand);
305
306 // Custom lower Add/Sub/Mul with overflow.
307 setOperationAction(ISD::SADDO, MVT::i32, Custom);
308 setOperationAction(ISD::SADDO, MVT::i64, Custom);
309 setOperationAction(ISD::UADDO, MVT::i32, Custom);
310 setOperationAction(ISD::UADDO, MVT::i64, Custom);
311 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
312 setOperationAction(ISD::SSUBO, MVT::i64, Custom);
313 setOperationAction(ISD::USUBO, MVT::i32, Custom);
314 setOperationAction(ISD::USUBO, MVT::i64, Custom);
315 setOperationAction(ISD::SMULO, MVT::i32, Custom);
316 setOperationAction(ISD::SMULO, MVT::i64, Custom);
317 setOperationAction(ISD::UMULO, MVT::i32, Custom);
318 setOperationAction(ISD::UMULO, MVT::i64, Custom);
319
320 setOperationAction(ISD::FSIN, MVT::f32, Expand);
321 setOperationAction(ISD::FSIN, MVT::f64, Expand);
322 setOperationAction(ISD::FCOS, MVT::f32, Expand);
323 setOperationAction(ISD::FCOS, MVT::f64, Expand);
324 setOperationAction(ISD::FPOW, MVT::f32, Expand);
325 setOperationAction(ISD::FPOW, MVT::f64, Expand);
326 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
327 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
328 if (Subtarget->hasFullFP16())
329 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Custom);
330 else
331 setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
332
333 setOperationAction(ISD::FREM, MVT::f16, Promote);
334 setOperationAction(ISD::FREM, MVT::v4f16, Promote);
335 setOperationAction(ISD::FREM, MVT::v8f16, Promote);
336 setOperationAction(ISD::FPOW, MVT::f16, Promote);
337 setOperationAction(ISD::FPOW, MVT::v4f16, Promote);
338 setOperationAction(ISD::FPOW, MVT::v8f16, Promote);
339 setOperationAction(ISD::FPOWI, MVT::f16, Promote);
340 setOperationAction(ISD::FCOS, MVT::f16, Promote);
341 setOperationAction(ISD::FCOS, MVT::v4f16, Promote);
342 setOperationAction(ISD::FCOS, MVT::v8f16, Promote);
343 setOperationAction(ISD::FSIN, MVT::f16, Promote);
344 setOperationAction(ISD::FSIN, MVT::v4f16, Promote);
345 setOperationAction(ISD::FSIN, MVT::v8f16, Promote);
346 setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
347 setOperationAction(ISD::FSINCOS, MVT::v4f16, Promote);
348 setOperationAction(ISD::FSINCOS, MVT::v8f16, Promote);
349 setOperationAction(ISD::FEXP, MVT::f16, Promote);
350 setOperationAction(ISD::FEXP, MVT::v4f16, Promote);
351 setOperationAction(ISD::FEXP, MVT::v8f16, Promote);
352 setOperationAction(ISD::FEXP2, MVT::f16, Promote);
353 setOperationAction(ISD::FEXP2, MVT::v4f16, Promote);
354 setOperationAction(ISD::FEXP2, MVT::v8f16, Promote);
355 setOperationAction(ISD::FLOG, MVT::f16, Promote);
356 setOperationAction(ISD::FLOG, MVT::v4f16, Promote);
357 setOperationAction(ISD::FLOG, MVT::v8f16, Promote);
358 setOperationAction(ISD::FLOG2, MVT::f16, Promote);
359 setOperationAction(ISD::FLOG2, MVT::v4f16, Promote);
360 setOperationAction(ISD::FLOG2, MVT::v8f16, Promote);
361 setOperationAction(ISD::FLOG10, MVT::f16, Promote);
362 setOperationAction(ISD::FLOG10, MVT::v4f16, Promote);
363 setOperationAction(ISD::FLOG10, MVT::v8f16, Promote);
364
365 if (!Subtarget->hasFullFP16()) {
366 setOperationAction(ISD::SELECT, MVT::f16, Promote);
367 setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
368 setOperationAction(ISD::SETCC, MVT::f16, Promote);
369 setOperationAction(ISD::BR_CC, MVT::f16, Promote);
370 setOperationAction(ISD::FADD, MVT::f16, Promote);
371 setOperationAction(ISD::FSUB, MVT::f16, Promote);
372 setOperationAction(ISD::FMUL, MVT::f16, Promote);
373 setOperationAction(ISD::FDIV, MVT::f16, Promote);
374 setOperationAction(ISD::FMA, MVT::f16, Promote);
375 setOperationAction(ISD::FNEG, MVT::f16, Promote);
376 setOperationAction(ISD::FABS, MVT::f16, Promote);
377 setOperationAction(ISD::FCEIL, MVT::f16, Promote);
378 setOperationAction(ISD::FSQRT, MVT::f16, Promote);
379 setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
380 setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
381 setOperationAction(ISD::FRINT, MVT::f16, Promote);
382 setOperationAction(ISD::FROUND, MVT::f16, Promote);
383 setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
384 setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
385 setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
386 setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
387 setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
388
389 // promote v4f16 to v4f32 when that is known to be safe.
390 setOperationAction(ISD::FADD, MVT::v4f16, Promote);
391 setOperationAction(ISD::FSUB, MVT::v4f16, Promote);
392 setOperationAction(ISD::FMUL, MVT::v4f16, Promote);
393 setOperationAction(ISD::FDIV, MVT::v4f16, Promote);
394 setOperationAction(ISD::FP_EXTEND, MVT::v4f16, Promote);
395 setOperationAction(ISD::FP_ROUND, MVT::v4f16, Promote);
396 AddPromotedToType(ISD::FADD, MVT::v4f16, MVT::v4f32);
397 AddPromotedToType(ISD::FSUB, MVT::v4f16, MVT::v4f32);
398 AddPromotedToType(ISD::FMUL, MVT::v4f16, MVT::v4f32);
399 AddPromotedToType(ISD::FDIV, MVT::v4f16, MVT::v4f32);
400 AddPromotedToType(ISD::FP_EXTEND, MVT::v4f16, MVT::v4f32);
401 AddPromotedToType(ISD::FP_ROUND, MVT::v4f16, MVT::v4f32);
402
403 setOperationAction(ISD::FABS, MVT::v4f16, Expand);
404 setOperationAction(ISD::FNEG, MVT::v4f16, Expand);
405 setOperationAction(ISD::FROUND, MVT::v4f16, Expand);
406 setOperationAction(ISD::FMA, MVT::v4f16, Expand);
407 setOperationAction(ISD::SETCC, MVT::v4f16, Expand);
408 setOperationAction(ISD::BR_CC, MVT::v4f16, Expand);
409 setOperationAction(ISD::SELECT, MVT::v4f16, Expand);
410 setOperationAction(ISD::SELECT_CC, MVT::v4f16, Expand);
411 setOperationAction(ISD::FTRUNC, MVT::v4f16, Expand);
412 setOperationAction(ISD::FCOPYSIGN, MVT::v4f16, Expand);
413 setOperationAction(ISD::FFLOOR, MVT::v4f16, Expand);
414 setOperationAction(ISD::FCEIL, MVT::v4f16, Expand);
415 setOperationAction(ISD::FRINT, MVT::v4f16, Expand);
416 setOperationAction(ISD::FNEARBYINT, MVT::v4f16, Expand);
417 setOperationAction(ISD::FSQRT, MVT::v4f16, Expand);
418
419 setOperationAction(ISD::FABS, MVT::v8f16, Expand);
420 setOperationAction(ISD::FADD, MVT::v8f16, Expand);
421 setOperationAction(ISD::FCEIL, MVT::v8f16, Expand);
422 setOperationAction(ISD::FCOPYSIGN, MVT::v8f16, Expand);
423 setOperationAction(ISD::FDIV, MVT::v8f16, Expand);
424 setOperationAction(ISD::FFLOOR, MVT::v8f16, Expand);
425 setOperationAction(ISD::FMA, MVT::v8f16, Expand);
426 setOperationAction(ISD::FMUL, MVT::v8f16, Expand);
427 setOperationAction(ISD::FNEARBYINT, MVT::v8f16, Expand);
428 setOperationAction(ISD::FNEG, MVT::v8f16, Expand);
429 setOperationAction(ISD::FROUND, MVT::v8f16, Expand);
430 setOperationAction(ISD::FRINT, MVT::v8f16, Expand);
431 setOperationAction(ISD::FSQRT, MVT::v8f16, Expand);
432 setOperationAction(ISD::FSUB, MVT::v8f16, Expand);
433 setOperationAction(ISD::FTRUNC, MVT::v8f16, Expand);
434 setOperationAction(ISD::SETCC, MVT::v8f16, Expand);
435 setOperationAction(ISD::BR_CC, MVT::v8f16, Expand);
436 setOperationAction(ISD::SELECT, MVT::v8f16, Expand);
437 setOperationAction(ISD::SELECT_CC, MVT::v8f16, Expand);
438 setOperationAction(ISD::FP_EXTEND, MVT::v8f16, Expand);
439 }
440
441 // AArch64 has implementations of a lot of rounding-like FP operations.
442 for (MVT Ty : {MVT::f32, MVT::f64}) {
443 setOperationAction(ISD::FFLOOR, Ty, Legal);
444 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
445 setOperationAction(ISD::FCEIL, Ty, Legal);
446 setOperationAction(ISD::FRINT, Ty, Legal);
447 setOperationAction(ISD::FTRUNC, Ty, Legal);
448 setOperationAction(ISD::FROUND, Ty, Legal);
449 setOperationAction(ISD::FMINNUM, Ty, Legal);
450 setOperationAction(ISD::FMAXNUM, Ty, Legal);
451 setOperationAction(ISD::FMINNAN, Ty, Legal);
452 setOperationAction(ISD::FMAXNAN, Ty, Legal);
453 }
454
455 if (Subtarget->hasFullFP16()) {
456 setOperationAction(ISD::FNEARBYINT, MVT::f16, Legal);
457 setOperationAction(ISD::FFLOOR, MVT::f16, Legal);
458 setOperationAction(ISD::FCEIL, MVT::f16, Legal);
459 setOperationAction(ISD::FRINT, MVT::f16, Legal);
460 setOperationAction(ISD::FTRUNC, MVT::f16, Legal);
461 setOperationAction(ISD::FROUND, MVT::f16, Legal);
462 setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
463 setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
464 setOperationAction(ISD::FMINNAN, MVT::f16, Legal);
465 setOperationAction(ISD::FMAXNAN, MVT::f16, Legal);
466 }
467
468 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
469
470 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
471 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Custom);
472 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom);
473 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Custom);
474 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom);
475
476 // Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
477 // This requires the Performance Monitors extension.
478 if (Subtarget->hasPerfMon())
479 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal);
480
481 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
482 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
483 // Issue __sincos_stret if available.
484 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
485 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
486 } else {
487 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
488 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
489 }
490
491 // Make floating-point constants legal for the large code model, so they don't
492 // become loads from the constant pool.
493 if (Subtarget->isTargetMachO() && TM.getCodeModel() == CodeModel::Large) {
494 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
495 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
496 }
497
498 // AArch64 does not have floating-point extending loads, i1 sign-extending
499 // load, floating-point truncating stores, or v2i32->v2i16 truncating store.
500 for (MVT VT : MVT::fp_valuetypes()) {
501 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
502 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
503 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
504 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f80, Expand);
505 }
506 for (MVT VT : MVT::integer_valuetypes())
507 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Expand);
508
509 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
510 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
511 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
512 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
513 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
514 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
515 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
516
517 setOperationAction(ISD::BITCAST, MVT::i16, Custom);
518 setOperationAction(ISD::BITCAST, MVT::f16, Custom);
519
520 // Indexed loads and stores are supported.
521 for (unsigned im = (unsigned)ISD::PRE_INC;
522 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
523 setIndexedLoadAction(im, MVT::i8, Legal);
524 setIndexedLoadAction(im, MVT::i16, Legal);
525 setIndexedLoadAction(im, MVT::i32, Legal);
526 setIndexedLoadAction(im, MVT::i64, Legal);
527 setIndexedLoadAction(im, MVT::f64, Legal);
528 setIndexedLoadAction(im, MVT::f32, Legal);
529 setIndexedLoadAction(im, MVT::f16, Legal);
530 setIndexedStoreAction(im, MVT::i8, Legal);
531 setIndexedStoreAction(im, MVT::i16, Legal);
532 setIndexedStoreAction(im, MVT::i32, Legal);
533 setIndexedStoreAction(im, MVT::i64, Legal);
534 setIndexedStoreAction(im, MVT::f64, Legal);
535 setIndexedStoreAction(im, MVT::f32, Legal);
536 setIndexedStoreAction(im, MVT::f16, Legal);
537 }
538
539 // Trap.
540 setOperationAction(ISD::TRAP, MVT::Other, Legal);
541
542 // We combine OR nodes for bitfield operations.
543 setTargetDAGCombine(ISD::OR);
544
545 // Vector add and sub nodes may conceal a high-half opportunity.
546 // Also, try to fold ADD into CSINC/CSINV..
547 setTargetDAGCombine(ISD::ADD);
548 setTargetDAGCombine(ISD::SUB);
549 setTargetDAGCombine(ISD::SRL);
550 setTargetDAGCombine(ISD::XOR);
551 setTargetDAGCombine(ISD::SINT_TO_FP);
552 setTargetDAGCombine(ISD::UINT_TO_FP);
553
554 setTargetDAGCombine(ISD::FP_TO_SINT);
555 setTargetDAGCombine(ISD::FP_TO_UINT);
556 setTargetDAGCombine(ISD::FDIV);
557
558 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
559
560 setTargetDAGCombine(ISD::ANY_EXTEND);
561 setTargetDAGCombine(ISD::ZERO_EXTEND);
562 setTargetDAGCombine(ISD::SIGN_EXTEND);
563 setTargetDAGCombine(ISD::BITCAST);
564 setTargetDAGCombine(ISD::CONCAT_VECTORS);
565 setTargetDAGCombine(ISD::STORE);
566 if (Subtarget->supportsAddressTopByteIgnored())
567 setTargetDAGCombine(ISD::LOAD);
568
569 setTargetDAGCombine(ISD::MUL);
570
571 setTargetDAGCombine(ISD::SELECT);
572 setTargetDAGCombine(ISD::VSELECT);
573
574 setTargetDAGCombine(ISD::INTRINSIC_VOID);
575 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
576 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
577
578 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
579 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
580 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
581
582 setStackPointerRegisterToSaveRestore(AArch64::SP);
583
584 setSchedulingPreference(Sched::Hybrid);
585
586 EnableExtLdPromotion = true;
587
588 // Set required alignment.
589 setMinFunctionAlignment(2);
590 // Set preferred alignments.
591 setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
592 setPrefLoopAlignment(STI.getPrefLoopAlignment());
593
594 // Only change the limit for entries in a jump table if specified by
595 // the subtarget, but not at the command line.
596 unsigned MaxJT = STI.getMaximumJumpTableSize();
597 if (MaxJT && getMaximumJumpTableSize() == 0)
598 setMaximumJumpTableSize(MaxJT);
599
600 setHasExtractBitsInsn(true);
601
602 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
603
604 if (Subtarget->hasNEON()) {
605 // FIXME: v1f64 shouldn't be legal if we can avoid it, because it leads to
606 // silliness like this:
607 setOperationAction(ISD::FABS, MVT::v1f64, Expand);
608 setOperationAction(ISD::FADD, MVT::v1f64, Expand);
609 setOperationAction(ISD::FCEIL, MVT::v1f64, Expand);
610 setOperationAction(ISD::FCOPYSIGN, MVT::v1f64, Expand);
611 setOperationAction(ISD::FCOS, MVT::v1f64, Expand);
612 setOperationAction(ISD::FDIV, MVT::v1f64, Expand);
613 setOperationAction(ISD::FFLOOR, MVT::v1f64, Expand);
614 setOperationAction(ISD::FMA, MVT::v1f64, Expand);
615 setOperationAction(ISD::FMUL, MVT::v1f64, Expand);
616 setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Expand);
617 setOperationAction(ISD::FNEG, MVT::v1f64, Expand);
618 setOperationAction(ISD::FPOW, MVT::v1f64, Expand);
619 setOperationAction(ISD::FREM, MVT::v1f64, Expand);
620 setOperationAction(ISD::FROUND, MVT::v1f64, Expand);
621 setOperationAction(ISD::FRINT, MVT::v1f64, Expand);
622 setOperationAction(ISD::FSIN, MVT::v1f64, Expand);
623 setOperationAction(ISD::FSINCOS, MVT::v1f64, Expand);
624 setOperationAction(ISD::FSQRT, MVT::v1f64, Expand);
625 setOperationAction(ISD::FSUB, MVT::v1f64, Expand);
626 setOperationAction(ISD::FTRUNC, MVT::v1f64, Expand);
627 setOperationAction(ISD::SETCC, MVT::v1f64, Expand);
628 setOperationAction(ISD::BR_CC, MVT::v1f64, Expand);
629 setOperationAction(ISD::SELECT, MVT::v1f64, Expand);
630 setOperationAction(ISD::SELECT_CC, MVT::v1f64, Expand);
631 setOperationAction(ISD::FP_EXTEND, MVT::v1f64, Expand);
632
633 setOperationAction(ISD::FP_TO_SINT, MVT::v1i64, Expand);
634 setOperationAction(ISD::FP_TO_UINT, MVT::v1i64, Expand);
635 setOperationAction(ISD::SINT_TO_FP, MVT::v1i64, Expand);
636 setOperationAction(ISD::UINT_TO_FP, MVT::v1i64, Expand);
637 setOperationAction(ISD::FP_ROUND, MVT::v1f64, Expand);
638
639 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
640
641 // AArch64 doesn't have a direct vector ->f32 conversion instructions for
642 // elements smaller than i32, so promote the input to i32 first.
643 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i8, MVT::v4i32);
644 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i8, MVT::v4i32);
645 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v4i16, MVT::v4i32);
646 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v4i16, MVT::v4i32);
647 // i8 and i16 vector elements also need promotion to i32 for v8i8 or v8i16
648 // -> v8f16 conversions.
649 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i8, MVT::v8i32);
650 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i8, MVT::v8i32);
651 setOperationPromotedToType(ISD::SINT_TO_FP, MVT::v8i16, MVT::v8i32);
652 setOperationPromotedToType(ISD::UINT_TO_FP, MVT::v8i16, MVT::v8i32);
653 // Similarly, there is no direct i32 -> f64 vector conversion instruction.
654 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
655 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
656 setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Custom);
657 setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Custom);
658 // Or, direct i32 -> f16 vector conversion. Set it so custom, so the
659 // conversion happens in two steps: v4i32 -> v4f32 -> v4f16
660 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Custom);
661 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
662
663 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
664 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
665
666 setOperationAction(ISD::CTTZ, MVT::v2i8, Expand);
667 setOperationAction(ISD::CTTZ, MVT::v4i16, Expand);
668 setOperationAction(ISD::CTTZ, MVT::v2i32, Expand);
669 setOperationAction(ISD::CTTZ, MVT::v1i64, Expand);
670 setOperationAction(ISD::CTTZ, MVT::v16i8, Expand);
671 setOperationAction(ISD::CTTZ, MVT::v8i16, Expand);
672 setOperationAction(ISD::CTTZ, MVT::v4i32, Expand);
673 setOperationAction(ISD::CTTZ, MVT::v2i64, Expand);
674
675 // AArch64 doesn't have MUL.2d:
676 setOperationAction(ISD::MUL, MVT::v2i64, Expand);
677 // Custom handling for some quad-vector types to detect MULL.
678 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
679 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
680 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
681
682 // Vector reductions
683 for (MVT VT : MVT::integer_valuetypes()) {
684 setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
685 setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
686 setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
687 setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
688 setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
689 }
690 for (MVT VT : MVT::fp_valuetypes()) {
691 setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
692 setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
693 }
694
695 setOperationAction(ISD::ANY_EXTEND, MVT::v4i32, Legal);
696 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
697 // Likewise, narrowing and extending vector loads/stores aren't handled
698 // directly.
699 for (MVT VT : MVT::vector_valuetypes()) {
700 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
701
702 setOperationAction(ISD::MULHS, VT, Expand);
703 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
704 setOperationAction(ISD::MULHU, VT, Expand);
705 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
706
707 setOperationAction(ISD::BSWAP, VT, Expand);
708
709 for (MVT InnerVT : MVT::vector_valuetypes()) {
710 setTruncStoreAction(VT, InnerVT, Expand);
711 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
712 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
713 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
714 }
715 }
716
717 // AArch64 has implementations of a lot of rounding-like FP operations.
718 for (MVT Ty : {MVT::v2f32, MVT::v4f32, MVT::v2f64}) {
719 setOperationAction(ISD::FFLOOR, Ty, Legal);
720 setOperationAction(ISD::FNEARBYINT, Ty, Legal);
721 setOperationAction(ISD::FCEIL, Ty, Legal);
722 setOperationAction(ISD::FRINT, Ty, Legal);
723 setOperationAction(ISD::FTRUNC, Ty, Legal);
724 setOperationAction(ISD::FROUND, Ty, Legal);
725 }
726 }
727
728 PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
729}
730
731void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
732 assert(VT.isVector() && "VT should be a vector type")(static_cast <bool> (VT.isVector() && "VT should be a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"VT should be a vector type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 732, __extension__ __PRETTY_FUNCTION__))
;
733
734 if (VT.isFloatingPoint()) {
735 MVT PromoteTo = EVT(VT).changeVectorElementTypeToInteger().getSimpleVT();
736 setOperationPromotedToType(ISD::LOAD, VT, PromoteTo);
737 setOperationPromotedToType(ISD::STORE, VT, PromoteTo);
738 }
739
740 // Mark vector float intrinsics as expand.
741 if (VT == MVT::v2f32 || VT == MVT::v4f32 || VT == MVT::v2f64) {
742 setOperationAction(ISD::FSIN, VT, Expand);
743 setOperationAction(ISD::FCOS, VT, Expand);
744 setOperationAction(ISD::FPOW, VT, Expand);
745 setOperationAction(ISD::FLOG, VT, Expand);
746 setOperationAction(ISD::FLOG2, VT, Expand);
747 setOperationAction(ISD::FLOG10, VT, Expand);
748 setOperationAction(ISD::FEXP, VT, Expand);
749 setOperationAction(ISD::FEXP2, VT, Expand);
750
751 // But we do support custom-lowering for FCOPYSIGN.
752 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
753 }
754
755 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
756 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
757 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
758 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
759 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
760 setOperationAction(ISD::SRA, VT, Custom);
761 setOperationAction(ISD::SRL, VT, Custom);
762 setOperationAction(ISD::SHL, VT, Custom);
763 setOperationAction(ISD::AND, VT, Custom);
764 setOperationAction(ISD::OR, VT, Custom);
765 setOperationAction(ISD::SETCC, VT, Custom);
766 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
767
768 setOperationAction(ISD::SELECT, VT, Expand);
769 setOperationAction(ISD::SELECT_CC, VT, Expand);
770 setOperationAction(ISD::VSELECT, VT, Expand);
771 for (MVT InnerVT : MVT::all_valuetypes())
772 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
773
774 // CNT supports only B element sizes.
775 if (VT != MVT::v8i8 && VT != MVT::v16i8)
776 setOperationAction(ISD::CTPOP, VT, Expand);
777
778 setOperationAction(ISD::UDIV, VT, Expand);
779 setOperationAction(ISD::SDIV, VT, Expand);
780 setOperationAction(ISD::UREM, VT, Expand);
781 setOperationAction(ISD::SREM, VT, Expand);
782 setOperationAction(ISD::FREM, VT, Expand);
783
784 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
785 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
786
787 if (!VT.isFloatingPoint())
788 setOperationAction(ISD::ABS, VT, Legal);
789
790 // [SU][MIN|MAX] are available for all NEON types apart from i64.
791 if (!VT.isFloatingPoint() && VT != MVT::v2i64 && VT != MVT::v1i64)
792 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
793 setOperationAction(Opcode, VT, Legal);
794
795 // F[MIN|MAX][NUM|NAN] are available for all FP NEON types.
796 if (VT.isFloatingPoint() &&
797 (VT.getVectorElementType() != MVT::f16 || Subtarget->hasFullFP16()))
798 for (unsigned Opcode : {ISD::FMINNAN, ISD::FMAXNAN,
799 ISD::FMINNUM, ISD::FMAXNUM})
800 setOperationAction(Opcode, VT, Legal);
801
802 if (Subtarget->isLittleEndian()) {
803 for (unsigned im = (unsigned)ISD::PRE_INC;
804 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
805 setIndexedLoadAction(im, VT, Legal);
806 setIndexedStoreAction(im, VT, Legal);
807 }
808 }
809}
810
811void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
812 addRegisterClass(VT, &AArch64::FPR64RegClass);
813 addTypeForNEON(VT, MVT::v2i32);
814}
815
816void AArch64TargetLowering::addQRTypeForNEON(MVT VT) {
817 addRegisterClass(VT, &AArch64::FPR128RegClass);
818 addTypeForNEON(VT, MVT::v4i32);
819}
820
821EVT AArch64TargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
822 EVT VT) const {
823 if (!VT.isVector())
824 return MVT::i32;
825 return VT.changeVectorElementTypeToInteger();
826}
827
828static bool optimizeLogicalImm(SDValue Op, unsigned Size, uint64_t Imm,
829 const APInt &Demanded,
830 TargetLowering::TargetLoweringOpt &TLO,
831 unsigned NewOpc) {
832 uint64_t OldImm = Imm, NewImm, Enc;
833 uint64_t Mask = ((uint64_t)(-1LL) >> (64 - Size)), OrigMask = Mask;
834
835 // Return if the immediate is already all zeros, all ones, a bimm32 or a
836 // bimm64.
837 if (Imm == 0 || Imm == Mask ||
838 AArch64_AM::isLogicalImmediate(Imm & Mask, Size))
839 return false;
840
841 unsigned EltSize = Size;
842 uint64_t DemandedBits = Demanded.getZExtValue();
843
844 // Clear bits that are not demanded.
845 Imm &= DemandedBits;
846
847 while (true) {
848 // The goal here is to set the non-demanded bits in a way that minimizes
849 // the number of switching between 0 and 1. In order to achieve this goal,
850 // we set the non-demanded bits to the value of the preceding demanded bits.
851 // For example, if we have an immediate 0bx10xx0x1 ('x' indicates a
852 // non-demanded bit), we copy bit0 (1) to the least significant 'x',
853 // bit2 (0) to 'xx', and bit6 (1) to the most significant 'x'.
854 // The final result is 0b11000011.
855 uint64_t NonDemandedBits = ~DemandedBits;
856 uint64_t InvertedImm = ~Imm & DemandedBits;
857 uint64_t RotatedImm =
858 ((InvertedImm << 1) | (InvertedImm >> (EltSize - 1) & 1)) &
859 NonDemandedBits;
860 uint64_t Sum = RotatedImm + NonDemandedBits;
861 bool Carry = NonDemandedBits & ~Sum & (1ULL << (EltSize - 1));
862 uint64_t Ones = (Sum + Carry) & NonDemandedBits;
863 NewImm = (Imm | Ones) & Mask;
864
865 // If NewImm or its bitwise NOT is a shifted mask, it is a bitmask immediate
866 // or all-ones or all-zeros, in which case we can stop searching. Otherwise,
867 // we halve the element size and continue the search.
868 if (isShiftedMask_64(NewImm) || isShiftedMask_64(~(NewImm | ~Mask)))
869 break;
870
871 // We cannot shrink the element size any further if it is 2-bits.
872 if (EltSize == 2)
873 return false;
874
875 EltSize /= 2;
876 Mask >>= EltSize;
877 uint64_t Hi = Imm >> EltSize, DemandedBitsHi = DemandedBits >> EltSize;
878
879 // Return if there is mismatch in any of the demanded bits of Imm and Hi.
880 if (((Imm ^ Hi) & (DemandedBits & DemandedBitsHi) & Mask) != 0)
881 return false;
882
883 // Merge the upper and lower halves of Imm and DemandedBits.
884 Imm |= Hi;
885 DemandedBits |= DemandedBitsHi;
886 }
887
888 ++NumOptimizedImms;
889
890 // Replicate the element across the register width.
891 while (EltSize < Size) {
892 NewImm |= NewImm << EltSize;
893 EltSize *= 2;
894 }
895
896 (void)OldImm;
897 assert(((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 &&(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 898, __extension__ __PRETTY_FUNCTION__))
898 "demanded bits should never be altered")(static_cast <bool> (((OldImm ^ NewImm) & Demanded.
getZExtValue()) == 0 && "demanded bits should never be altered"
) ? void (0) : __assert_fail ("((OldImm ^ NewImm) & Demanded.getZExtValue()) == 0 && \"demanded bits should never be altered\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 898, __extension__ __PRETTY_FUNCTION__))
;
899 assert(OldImm != NewImm && "the new imm shouldn't be equal to the old imm")(static_cast <bool> (OldImm != NewImm && "the new imm shouldn't be equal to the old imm"
) ? void (0) : __assert_fail ("OldImm != NewImm && \"the new imm shouldn't be equal to the old imm\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 899, __extension__ __PRETTY_FUNCTION__))
;
900
901 // Create the new constant immediate node.
902 EVT VT = Op.getValueType();
903 SDLoc DL(Op);
904 SDValue New;
905
906 // If the new constant immediate is all-zeros or all-ones, let the target
907 // independent DAG combine optimize this node.
908 if (NewImm == 0 || NewImm == OrigMask) {
909 New = TLO.DAG.getNode(Op.getOpcode(), DL, VT, Op.getOperand(0),
910 TLO.DAG.getConstant(NewImm, DL, VT));
911 // Otherwise, create a machine node so that target independent DAG combine
912 // doesn't undo this optimization.
913 } else {
914 Enc = AArch64_AM::encodeLogicalImmediate(NewImm, Size);
915 SDValue EncConst = TLO.DAG.getTargetConstant(Enc, DL, VT);
916 New = SDValue(
917 TLO.DAG.getMachineNode(NewOpc, DL, VT, Op.getOperand(0), EncConst), 0);
918 }
919
920 return TLO.CombineTo(Op, New);
921}
922
923bool AArch64TargetLowering::targetShrinkDemandedConstant(
924 SDValue Op, const APInt &Demanded, TargetLoweringOpt &TLO) const {
925 // Delay this optimization to as late as possible.
926 if (!TLO.LegalOps)
927 return false;
928
929 if (!EnableOptimizeLogicalImm)
930 return false;
931
932 EVT VT = Op.getValueType();
933 if (VT.isVector())
934 return false;
935
936 unsigned Size = VT.getSizeInBits();
937 assert((Size == 32 || Size == 64) &&(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 938, __extension__ __PRETTY_FUNCTION__))
938 "i32 or i64 is expected after legalization.")(static_cast <bool> ((Size == 32 || Size == 64) &&
"i32 or i64 is expected after legalization.") ? void (0) : __assert_fail
("(Size == 32 || Size == 64) && \"i32 or i64 is expected after legalization.\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 938, __extension__ __PRETTY_FUNCTION__))
;
939
940 // Exit early if we demand all bits.
941 if (Demanded.countPopulation() == Size)
942 return false;
943
944 unsigned NewOpc;
945 switch (Op.getOpcode()) {
946 default:
947 return false;
948 case ISD::AND:
949 NewOpc = Size == 32 ? AArch64::ANDWri : AArch64::ANDXri;
950 break;
951 case ISD::OR:
952 NewOpc = Size == 32 ? AArch64::ORRWri : AArch64::ORRXri;
953 break;
954 case ISD::XOR:
955 NewOpc = Size == 32 ? AArch64::EORWri : AArch64::EORXri;
956 break;
957 }
958 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
959 if (!C)
960 return false;
961 uint64_t Imm = C->getZExtValue();
962 return optimizeLogicalImm(Op, Size, Imm, Demanded, TLO, NewOpc);
963}
964
965/// computeKnownBitsForTargetNode - Determine which of the bits specified in
966/// Mask are known to be either zero or one and return them Known.
967void AArch64TargetLowering::computeKnownBitsForTargetNode(
968 const SDValue Op, KnownBits &Known,
969 const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const {
970 switch (Op.getOpcode()) {
971 default:
972 break;
973 case AArch64ISD::CSEL: {
974 KnownBits Known2;
975 DAG.computeKnownBits(Op->getOperand(0), Known, Depth + 1);
976 DAG.computeKnownBits(Op->getOperand(1), Known2, Depth + 1);
977 Known.Zero &= Known2.Zero;
978 Known.One &= Known2.One;
979 break;
980 }
981 case ISD::INTRINSIC_W_CHAIN: {
982 ConstantSDNode *CN = cast<ConstantSDNode>(Op->getOperand(1));
983 Intrinsic::ID IntID = static_cast<Intrinsic::ID>(CN->getZExtValue());
984 switch (IntID) {
985 default: return;
986 case Intrinsic::aarch64_ldaxr:
987 case Intrinsic::aarch64_ldxr: {
988 unsigned BitWidth = Known.getBitWidth();
989 EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
990 unsigned MemBits = VT.getScalarSizeInBits();
991 Known.Zero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
992 return;
993 }
994 }
995 break;
996 }
997 case ISD::INTRINSIC_WO_CHAIN:
998 case ISD::INTRINSIC_VOID: {
999 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1000 switch (IntNo) {
1001 default:
1002 break;
1003 case Intrinsic::aarch64_neon_umaxv:
1004 case Intrinsic::aarch64_neon_uminv: {
1005 // Figure out the datatype of the vector operand. The UMINV instruction
1006 // will zero extend the result, so we can mark as known zero all the
1007 // bits larger than the element datatype. 32-bit or larget doesn't need
1008 // this as those are legal types and will be handled by isel directly.
1009 MVT VT = Op.getOperand(1).getValueType().getSimpleVT();
1010 unsigned BitWidth = Known.getBitWidth();
1011 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1012 assert(BitWidth >= 8 && "Unexpected width!")(static_cast <bool> (BitWidth >= 8 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 8 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1012, __extension__ __PRETTY_FUNCTION__))
;
1013 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 8);
1014 Known.Zero |= Mask;
1015 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1016 assert(BitWidth >= 16 && "Unexpected width!")(static_cast <bool> (BitWidth >= 16 && "Unexpected width!"
) ? void (0) : __assert_fail ("BitWidth >= 16 && \"Unexpected width!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1016, __extension__ __PRETTY_FUNCTION__))
;
1017 APInt Mask = APInt::getHighBitsSet(BitWidth, BitWidth - 16);
1018 Known.Zero |= Mask;
1019 }
1020 break;
1021 } break;
1022 }
1023 }
1024 }
1025}
1026
1027MVT AArch64TargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1028 EVT) const {
1029 return MVT::i64;
1030}
1031
1032bool AArch64TargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
1033 unsigned AddrSpace,
1034 unsigned Align,
1035 bool *Fast) const {
1036 if (Subtarget->requiresStrictAlign())
1037 return false;
1038
1039 if (Fast) {
1040 // Some CPUs are fine with unaligned stores except for 128-bit ones.
1041 *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
1042 // See comments in performSTORECombine() for more details about
1043 // these conditions.
1044
1045 // Code that uses clang vector extensions can mark that it
1046 // wants unaligned accesses to be treated as fast by
1047 // underspecifying alignment to be 1 or 2.
1048 Align <= 2 ||
1049
1050 // Disregard v2i64. Memcpy lowering produces those and splitting
1051 // them regresses performance on micro-benchmarks and olden/bh.
1052 VT == MVT::v2i64;
1053 }
1054 return true;
1055}
1056
1057FastISel *
1058AArch64TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1059 const TargetLibraryInfo *libInfo) const {
1060 return AArch64::createFastISel(funcInfo, libInfo);
1061}
1062
1063const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
1064 switch ((AArch64ISD::NodeType)Opcode) {
1065 case AArch64ISD::FIRST_NUMBER: break;
1066 case AArch64ISD::CALL: return "AArch64ISD::CALL";
1067 case AArch64ISD::ADRP: return "AArch64ISD::ADRP";
1068 case AArch64ISD::ADDlow: return "AArch64ISD::ADDlow";
1069 case AArch64ISD::LOADgot: return "AArch64ISD::LOADgot";
1070 case AArch64ISD::RET_FLAG: return "AArch64ISD::RET_FLAG";
1071 case AArch64ISD::BRCOND: return "AArch64ISD::BRCOND";
1072 case AArch64ISD::CSEL: return "AArch64ISD::CSEL";
1073 case AArch64ISD::FCSEL: return "AArch64ISD::FCSEL";
1074 case AArch64ISD::CSINV: return "AArch64ISD::CSINV";
1075 case AArch64ISD::CSNEG: return "AArch64ISD::CSNEG";
1076 case AArch64ISD::CSINC: return "AArch64ISD::CSINC";
1077 case AArch64ISD::THREAD_POINTER: return "AArch64ISD::THREAD_POINTER";
1078 case AArch64ISD::TLSDESC_CALLSEQ: return "AArch64ISD::TLSDESC_CALLSEQ";
1079 case AArch64ISD::ADC: return "AArch64ISD::ADC";
1080 case AArch64ISD::SBC: return "AArch64ISD::SBC";
1081 case AArch64ISD::ADDS: return "AArch64ISD::ADDS";
1082 case AArch64ISD::SUBS: return "AArch64ISD::SUBS";
1083 case AArch64ISD::ADCS: return "AArch64ISD::ADCS";
1084 case AArch64ISD::SBCS: return "AArch64ISD::SBCS";
1085 case AArch64ISD::ANDS: return "AArch64ISD::ANDS";
1086 case AArch64ISD::CCMP: return "AArch64ISD::CCMP";
1087 case AArch64ISD::CCMN: return "AArch64ISD::CCMN";
1088 case AArch64ISD::FCCMP: return "AArch64ISD::FCCMP";
1089 case AArch64ISD::FCMP: return "AArch64ISD::FCMP";
1090 case AArch64ISD::DUP: return "AArch64ISD::DUP";
1091 case AArch64ISD::DUPLANE8: return "AArch64ISD::DUPLANE8";
1092 case AArch64ISD::DUPLANE16: return "AArch64ISD::DUPLANE16";
1093 case AArch64ISD::DUPLANE32: return "AArch64ISD::DUPLANE32";
1094 case AArch64ISD::DUPLANE64: return "AArch64ISD::DUPLANE64";
1095 case AArch64ISD::MOVI: return "AArch64ISD::MOVI";
1096 case AArch64ISD::MOVIshift: return "AArch64ISD::MOVIshift";
1097 case AArch64ISD::MOVIedit: return "AArch64ISD::MOVIedit";
1098 case AArch64ISD::MOVImsl: return "AArch64ISD::MOVImsl";
1099 case AArch64ISD::FMOV: return "AArch64ISD::FMOV";
1100 case AArch64ISD::MVNIshift: return "AArch64ISD::MVNIshift";
1101 case AArch64ISD::MVNImsl: return "AArch64ISD::MVNImsl";
1102 case AArch64ISD::BICi: return "AArch64ISD::BICi";
1103 case AArch64ISD::ORRi: return "AArch64ISD::ORRi";
1104 case AArch64ISD::BSL: return "AArch64ISD::BSL";
1105 case AArch64ISD::NEG: return "AArch64ISD::NEG";
1106 case AArch64ISD::EXTR: return "AArch64ISD::EXTR";
1107 case AArch64ISD::ZIP1: return "AArch64ISD::ZIP1";
1108 case AArch64ISD::ZIP2: return "AArch64ISD::ZIP2";
1109 case AArch64ISD::UZP1: return "AArch64ISD::UZP1";
1110 case AArch64ISD::UZP2: return "AArch64ISD::UZP2";
1111 case AArch64ISD::TRN1: return "AArch64ISD::TRN1";
1112 case AArch64ISD::TRN2: return "AArch64ISD::TRN2";
1113 case AArch64ISD::REV16: return "AArch64ISD::REV16";
1114 case AArch64ISD::REV32: return "AArch64ISD::REV32";
1115 case AArch64ISD::REV64: return "AArch64ISD::REV64";
1116 case AArch64ISD::EXT: return "AArch64ISD::EXT";
1117 case AArch64ISD::VSHL: return "AArch64ISD::VSHL";
1118 case AArch64ISD::VLSHR: return "AArch64ISD::VLSHR";
1119 case AArch64ISD::VASHR: return "AArch64ISD::VASHR";
1120 case AArch64ISD::CMEQ: return "AArch64ISD::CMEQ";
1121 case AArch64ISD::CMGE: return "AArch64ISD::CMGE";
1122 case AArch64ISD::CMGT: return "AArch64ISD::CMGT";
1123 case AArch64ISD::CMHI: return "AArch64ISD::CMHI";
1124 case AArch64ISD::CMHS: return "AArch64ISD::CMHS";
1125 case AArch64ISD::FCMEQ: return "AArch64ISD::FCMEQ";
1126 case AArch64ISD::FCMGE: return "AArch64ISD::FCMGE";
1127 case AArch64ISD::FCMGT: return "AArch64ISD::FCMGT";
1128 case AArch64ISD::CMEQz: return "AArch64ISD::CMEQz";
1129 case AArch64ISD::CMGEz: return "AArch64ISD::CMGEz";
1130 case AArch64ISD::CMGTz: return "AArch64ISD::CMGTz";
1131 case AArch64ISD::CMLEz: return "AArch64ISD::CMLEz";
1132 case AArch64ISD::CMLTz: return "AArch64ISD::CMLTz";
1133 case AArch64ISD::FCMEQz: return "AArch64ISD::FCMEQz";
1134 case AArch64ISD::FCMGEz: return "AArch64ISD::FCMGEz";
1135 case AArch64ISD::FCMGTz: return "AArch64ISD::FCMGTz";
1136 case AArch64ISD::FCMLEz: return "AArch64ISD::FCMLEz";
1137 case AArch64ISD::FCMLTz: return "AArch64ISD::FCMLTz";
1138 case AArch64ISD::SADDV: return "AArch64ISD::SADDV";
1139 case AArch64ISD::UADDV: return "AArch64ISD::UADDV";
1140 case AArch64ISD::SMINV: return "AArch64ISD::SMINV";
1141 case AArch64ISD::UMINV: return "AArch64ISD::UMINV";
1142 case AArch64ISD::SMAXV: return "AArch64ISD::SMAXV";
1143 case AArch64ISD::UMAXV: return "AArch64ISD::UMAXV";
1144 case AArch64ISD::NOT: return "AArch64ISD::NOT";
1145 case AArch64ISD::BIT: return "AArch64ISD::BIT";
1146 case AArch64ISD::CBZ: return "AArch64ISD::CBZ";
1147 case AArch64ISD::CBNZ: return "AArch64ISD::CBNZ";
1148 case AArch64ISD::TBZ: return "AArch64ISD::TBZ";
1149 case AArch64ISD::TBNZ: return "AArch64ISD::TBNZ";
1150 case AArch64ISD::TC_RETURN: return "AArch64ISD::TC_RETURN";
1151 case AArch64ISD::PREFETCH: return "AArch64ISD::PREFETCH";
1152 case AArch64ISD::SITOF: return "AArch64ISD::SITOF";
1153 case AArch64ISD::UITOF: return "AArch64ISD::UITOF";
1154 case AArch64ISD::NVCAST: return "AArch64ISD::NVCAST";
1155 case AArch64ISD::SQSHL_I: return "AArch64ISD::SQSHL_I";
1156 case AArch64ISD::UQSHL_I: return "AArch64ISD::UQSHL_I";
1157 case AArch64ISD::SRSHR_I: return "AArch64ISD::SRSHR_I";
1158 case AArch64ISD::URSHR_I: return "AArch64ISD::URSHR_I";
1159 case AArch64ISD::SQSHLU_I: return "AArch64ISD::SQSHLU_I";
1160 case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge";
1161 case AArch64ISD::LD2post: return "AArch64ISD::LD2post";
1162 case AArch64ISD::LD3post: return "AArch64ISD::LD3post";
1163 case AArch64ISD::LD4post: return "AArch64ISD::LD4post";
1164 case AArch64ISD::ST2post: return "AArch64ISD::ST2post";
1165 case AArch64ISD::ST3post: return "AArch64ISD::ST3post";
1166 case AArch64ISD::ST4post: return "AArch64ISD::ST4post";
1167 case AArch64ISD::LD1x2post: return "AArch64ISD::LD1x2post";
1168 case AArch64ISD::LD1x3post: return "AArch64ISD::LD1x3post";
1169 case AArch64ISD::LD1x4post: return "AArch64ISD::LD1x4post";
1170 case AArch64ISD::ST1x2post: return "AArch64ISD::ST1x2post";
1171 case AArch64ISD::ST1x3post: return "AArch64ISD::ST1x3post";
1172 case AArch64ISD::ST1x4post: return "AArch64ISD::ST1x4post";
1173 case AArch64ISD::LD1DUPpost: return "AArch64ISD::LD1DUPpost";
1174 case AArch64ISD::LD2DUPpost: return "AArch64ISD::LD2DUPpost";
1175 case AArch64ISD::LD3DUPpost: return "AArch64ISD::LD3DUPpost";
1176 case AArch64ISD::LD4DUPpost: return "AArch64ISD::LD4DUPpost";
1177 case AArch64ISD::LD1LANEpost: return "AArch64ISD::LD1LANEpost";
1178 case AArch64ISD::LD2LANEpost: return "AArch64ISD::LD2LANEpost";
1179 case AArch64ISD::LD3LANEpost: return "AArch64ISD::LD3LANEpost";
1180 case AArch64ISD::LD4LANEpost: return "AArch64ISD::LD4LANEpost";
1181 case AArch64ISD::ST2LANEpost: return "AArch64ISD::ST2LANEpost";
1182 case AArch64ISD::ST3LANEpost: return "AArch64ISD::ST3LANEpost";
1183 case AArch64ISD::ST4LANEpost: return "AArch64ISD::ST4LANEpost";
1184 case AArch64ISD::SMULL: return "AArch64ISD::SMULL";
1185 case AArch64ISD::UMULL: return "AArch64ISD::UMULL";
1186 case AArch64ISD::FRECPE: return "AArch64ISD::FRECPE";
1187 case AArch64ISD::FRECPS: return "AArch64ISD::FRECPS";
1188 case AArch64ISD::FRSQRTE: return "AArch64ISD::FRSQRTE";
1189 case AArch64ISD::FRSQRTS: return "AArch64ISD::FRSQRTS";
1190 }
1191 return nullptr;
1192}
1193
1194MachineBasicBlock *
1195AArch64TargetLowering::EmitF128CSEL(MachineInstr &MI,
1196 MachineBasicBlock *MBB) const {
1197 // We materialise the F128CSEL pseudo-instruction as some control flow and a
1198 // phi node:
1199
1200 // OrigBB:
1201 // [... previous instrs leading to comparison ...]
1202 // b.ne TrueBB
1203 // b EndBB
1204 // TrueBB:
1205 // ; Fallthrough
1206 // EndBB:
1207 // Dest = PHI [IfTrue, TrueBB], [IfFalse, OrigBB]
1208
1209 MachineFunction *MF = MBB->getParent();
1210 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1211 const BasicBlock *LLVM_BB = MBB->getBasicBlock();
1212 DebugLoc DL = MI.getDebugLoc();
1213 MachineFunction::iterator It = ++MBB->getIterator();
1214
1215 unsigned DestReg = MI.getOperand(0).getReg();
1216 unsigned IfTrueReg = MI.getOperand(1).getReg();
1217 unsigned IfFalseReg = MI.getOperand(2).getReg();
1218 unsigned CondCode = MI.getOperand(3).getImm();
1219 bool NZCVKilled = MI.getOperand(4).isKill();
1220
1221 MachineBasicBlock *TrueBB = MF->CreateMachineBasicBlock(LLVM_BB);
1222 MachineBasicBlock *EndBB = MF->CreateMachineBasicBlock(LLVM_BB);
1223 MF->insert(It, TrueBB);
1224 MF->insert(It, EndBB);
1225
1226 // Transfer rest of current basic-block to EndBB
1227 EndBB->splice(EndBB->begin(), MBB, std::next(MachineBasicBlock::iterator(MI)),
1228 MBB->end());
1229 EndBB->transferSuccessorsAndUpdatePHIs(MBB);
1230
1231 BuildMI(MBB, DL, TII->get(AArch64::Bcc)).addImm(CondCode).addMBB(TrueBB);
1232 BuildMI(MBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1233 MBB->addSuccessor(TrueBB);
1234 MBB->addSuccessor(EndBB);
1235
1236 // TrueBB falls through to the end.
1237 TrueBB->addSuccessor(EndBB);
1238
1239 if (!NZCVKilled) {
1240 TrueBB->addLiveIn(AArch64::NZCV);
1241 EndBB->addLiveIn(AArch64::NZCV);
1242 }
1243
1244 BuildMI(*EndBB, EndBB->begin(), DL, TII->get(AArch64::PHI), DestReg)
1245 .addReg(IfTrueReg)
1246 .addMBB(TrueBB)
1247 .addReg(IfFalseReg)
1248 .addMBB(MBB);
1249
1250 MI.eraseFromParent();
1251 return EndBB;
1252}
1253
1254MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter(
1255 MachineInstr &MI, MachineBasicBlock *BB) const {
1256 switch (MI.getOpcode()) {
1257 default:
1258#ifndef NDEBUG
1259 MI.dump();
1260#endif
1261 llvm_unreachable("Unexpected instruction for custom inserter!")::llvm::llvm_unreachable_internal("Unexpected instruction for custom inserter!"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1261)
;
1262
1263 case AArch64::F128CSEL:
1264 return EmitF128CSEL(MI, BB);
1265
1266 case TargetOpcode::STACKMAP:
1267 case TargetOpcode::PATCHPOINT:
1268 return emitPatchPoint(MI, BB);
1269 }
1270}
1271
1272//===----------------------------------------------------------------------===//
1273// AArch64 Lowering private implementation.
1274//===----------------------------------------------------------------------===//
1275
1276//===----------------------------------------------------------------------===//
1277// Lowering Code
1278//===----------------------------------------------------------------------===//
1279
1280/// changeIntCCToAArch64CC - Convert a DAG integer condition code to an AArch64
1281/// CC
1282static AArch64CC::CondCode changeIntCCToAArch64CC(ISD::CondCode CC) {
1283 switch (CC) {
1284 default:
1285 llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1285)
;
1286 case ISD::SETNE:
1287 return AArch64CC::NE;
1288 case ISD::SETEQ:
1289 return AArch64CC::EQ;
1290 case ISD::SETGT:
1291 return AArch64CC::GT;
1292 case ISD::SETGE:
1293 return AArch64CC::GE;
1294 case ISD::SETLT:
1295 return AArch64CC::LT;
1296 case ISD::SETLE:
1297 return AArch64CC::LE;
1298 case ISD::SETUGT:
1299 return AArch64CC::HI;
1300 case ISD::SETUGE:
1301 return AArch64CC::HS;
1302 case ISD::SETULT:
1303 return AArch64CC::LO;
1304 case ISD::SETULE:
1305 return AArch64CC::LS;
1306 }
1307}
1308
1309/// changeFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64 CC.
1310static void changeFPCCToAArch64CC(ISD::CondCode CC,
1311 AArch64CC::CondCode &CondCode,
1312 AArch64CC::CondCode &CondCode2) {
1313 CondCode2 = AArch64CC::AL;
1314 switch (CC) {
1315 default:
1316 llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1316)
;
1317 case ISD::SETEQ:
1318 case ISD::SETOEQ:
1319 CondCode = AArch64CC::EQ;
1320 break;
1321 case ISD::SETGT:
1322 case ISD::SETOGT:
1323 CondCode = AArch64CC::GT;
1324 break;
1325 case ISD::SETGE:
1326 case ISD::SETOGE:
1327 CondCode = AArch64CC::GE;
1328 break;
1329 case ISD::SETOLT:
1330 CondCode = AArch64CC::MI;
1331 break;
1332 case ISD::SETOLE:
1333 CondCode = AArch64CC::LS;
1334 break;
1335 case ISD::SETONE:
1336 CondCode = AArch64CC::MI;
1337 CondCode2 = AArch64CC::GT;
1338 break;
1339 case ISD::SETO:
1340 CondCode = AArch64CC::VC;
1341 break;
1342 case ISD::SETUO:
1343 CondCode = AArch64CC::VS;
1344 break;
1345 case ISD::SETUEQ:
1346 CondCode = AArch64CC::EQ;
1347 CondCode2 = AArch64CC::VS;
1348 break;
1349 case ISD::SETUGT:
1350 CondCode = AArch64CC::HI;
1351 break;
1352 case ISD::SETUGE:
1353 CondCode = AArch64CC::PL;
1354 break;
1355 case ISD::SETLT:
1356 case ISD::SETULT:
1357 CondCode = AArch64CC::LT;
1358 break;
1359 case ISD::SETLE:
1360 case ISD::SETULE:
1361 CondCode = AArch64CC::LE;
1362 break;
1363 case ISD::SETNE:
1364 case ISD::SETUNE:
1365 CondCode = AArch64CC::NE;
1366 break;
1367 }
1368}
1369
1370/// Convert a DAG fp condition code to an AArch64 CC.
1371/// This differs from changeFPCCToAArch64CC in that it returns cond codes that
1372/// should be AND'ed instead of OR'ed.
1373static void changeFPCCToANDAArch64CC(ISD::CondCode CC,
1374 AArch64CC::CondCode &CondCode,
1375 AArch64CC::CondCode &CondCode2) {
1376 CondCode2 = AArch64CC::AL;
1377 switch (CC) {
1378 default:
1379 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1380 assert(CondCode2 == AArch64CC::AL)(static_cast <bool> (CondCode2 == AArch64CC::AL) ? void
(0) : __assert_fail ("CondCode2 == AArch64CC::AL", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1380, __extension__ __PRETTY_FUNCTION__))
;
1381 break;
1382 case ISD::SETONE:
1383 // (a one b)
1384 // == ((a olt b) || (a ogt b))
1385 // == ((a ord b) && (a une b))
1386 CondCode = AArch64CC::VC;
1387 CondCode2 = AArch64CC::NE;
1388 break;
1389 case ISD::SETUEQ:
1390 // (a ueq b)
1391 // == ((a uno b) || (a oeq b))
1392 // == ((a ule b) && (a uge b))
1393 CondCode = AArch64CC::PL;
1394 CondCode2 = AArch64CC::LE;
1395 break;
1396 }
1397}
1398
1399/// changeVectorFPCCToAArch64CC - Convert a DAG fp condition code to an AArch64
1400/// CC usable with the vector instructions. Fewer operations are available
1401/// without a real NZCV register, so we have to use less efficient combinations
1402/// to get the same effect.
1403static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
1404 AArch64CC::CondCode &CondCode,
1405 AArch64CC::CondCode &CondCode2,
1406 bool &Invert) {
1407 Invert = false;
1408 switch (CC) {
1409 default:
1410 // Mostly the scalar mappings work fine.
1411 changeFPCCToAArch64CC(CC, CondCode, CondCode2);
1412 break;
1413 case ISD::SETUO:
1414 Invert = true;
1415 LLVM_FALLTHROUGH[[clang::fallthrough]];
1416 case ISD::SETO:
1417 CondCode = AArch64CC::MI;
1418 CondCode2 = AArch64CC::GE;
1419 break;
1420 case ISD::SETUEQ:
1421 case ISD::SETULT:
1422 case ISD::SETULE:
1423 case ISD::SETUGT:
1424 case ISD::SETUGE:
1425 // All of the compare-mask comparisons are ordered, but we can switch
1426 // between the two by a double inversion. E.g. ULE == !OGT.
1427 Invert = true;
1428 changeFPCCToAArch64CC(getSetCCInverse(CC, false), CondCode, CondCode2);
1429 break;
1430 }
1431}
1432
1433static bool isLegalArithImmed(uint64_t C) {
1434 // Matches AArch64DAGToDAGISel::SelectArithImmed().
1435 bool IsLegal = (C >> 12 == 0) || ((C & 0xFFFULL) == 0 && C >> 24 == 0);
1436 DEBUG(dbgs() << "Is imm " << C << " legal: " << (IsLegal ? "yes\n" : "no\n"))do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Is imm " << C <<
" legal: " << (IsLegal ? "yes\n" : "no\n"); } } while (
false)
;
1437 return IsLegal;
1438}
1439
1440static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1441 const SDLoc &dl, SelectionDAG &DAG) {
1442 EVT VT = LHS.getValueType();
1443 const bool FullFP16 =
1444 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1445
1446 if (VT.isFloatingPoint()) {
1447 assert(VT != MVT::f128)(static_cast <bool> (VT != MVT::f128) ? void (0) : __assert_fail
("VT != MVT::f128", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1447, __extension__ __PRETTY_FUNCTION__))
;
1448 if (VT == MVT::f16 && !FullFP16) {
1449 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
1450 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
1451 VT = MVT::f32;
1452 }
1453 return DAG.getNode(AArch64ISD::FCMP, dl, VT, LHS, RHS);
1454 }
1455
1456 // The CMP instruction is just an alias for SUBS, and representing it as
1457 // SUBS means that it's possible to get CSE with subtract operations.
1458 // A later phase can perform the optimization of setting the destination
1459 // register to WZR/XZR if it ends up being unused.
1460 unsigned Opcode = AArch64ISD::SUBS;
1461
1462 if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
1463 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1464 // We'd like to combine a (CMP op1, (sub 0, op2) into a CMN instruction on
1465 // the grounds that "op1 - (-op2) == op1 + op2". However, the C and V flags
1466 // can be set differently by this operation. It comes down to whether
1467 // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
1468 // everything is fine. If not then the optimization is wrong. Thus general
1469 // comparisons are only valid if op2 != 0.
1470
1471 // So, finally, the only LLVM-native comparisons that don't mention C and V
1472 // are SETEQ and SETNE. They're the only ones we can safely use CMN for in
1473 // the absence of information about op2.
1474 Opcode = AArch64ISD::ADDS;
1475 RHS = RHS.getOperand(1);
1476 } else if (LHS.getOpcode() == ISD::AND && isNullConstant(RHS) &&
1477 !isUnsignedIntSetCC(CC)) {
1478 // Similarly, (CMP (and X, Y), 0) can be implemented with a TST
1479 // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
1480 // of the signed comparisons.
1481 Opcode = AArch64ISD::ANDS;
1482 RHS = LHS.getOperand(1);
1483 LHS = LHS.getOperand(0);
1484 }
1485
1486 return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
1487 .getValue(1);
1488}
1489
1490/// \defgroup AArch64CCMP CMP;CCMP matching
1491///
1492/// These functions deal with the formation of CMP;CCMP;... sequences.
1493/// The CCMP/CCMN/FCCMP/FCCMPE instructions allow the conditional execution of
1494/// a comparison. They set the NZCV flags to a predefined value if their
1495/// predicate is false. This allows to express arbitrary conjunctions, for
1496/// example "cmp 0 (and (setCA (cmp A)) (setCB (cmp B))))"
1497/// expressed as:
1498/// cmp A
1499/// ccmp B, inv(CB), CA
1500/// check for CB flags
1501///
1502/// In general we can create code for arbitrary "... (and (and A B) C)"
1503/// sequences. We can also implement some "or" expressions, because "(or A B)"
1504/// is equivalent to "not (and (not A) (not B))" and we can implement some
1505/// negation operations:
1506/// We can negate the results of a single comparison by inverting the flags
1507/// used when the predicate fails and inverting the flags tested in the next
1508/// instruction; We can also negate the results of the whole previous
1509/// conditional compare sequence by inverting the flags tested in the next
1510/// instruction. However there is no way to negate the result of a partial
1511/// sequence.
1512///
1513/// Therefore on encountering an "or" expression we can negate the subtree on
1514/// one side and have to be able to push the negate to the leafs of the subtree
1515/// on the other side (see also the comments in code). As complete example:
1516/// "or (or (setCA (cmp A)) (setCB (cmp B)))
1517/// (and (setCC (cmp C)) (setCD (cmp D)))"
1518/// is transformed to
1519/// "not (and (not (and (setCC (cmp C)) (setCC (cmp D))))
1520/// (and (not (setCA (cmp A)) (not (setCB (cmp B))))))"
1521/// and implemented as:
1522/// cmp C
1523/// ccmp D, inv(CD), CC
1524/// ccmp A, CA, inv(CD)
1525/// ccmp B, CB, inv(CA)
1526/// check for CB flags
1527/// A counterexample is "or (and A B) (and C D)" which cannot be implemented
1528/// by conditional compare sequences.
1529/// @{
1530
1531/// Create a conditional comparison; Use CCMP, CCMN or FCCMP as appropriate.
1532static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
1533 ISD::CondCode CC, SDValue CCOp,
1534 AArch64CC::CondCode Predicate,
1535 AArch64CC::CondCode OutCC,
1536 const SDLoc &DL, SelectionDAG &DAG) {
1537 unsigned Opcode = 0;
1538 const bool FullFP16 =
1539 static_cast<const AArch64Subtarget &>(DAG.getSubtarget()).hasFullFP16();
1540
1541 if (LHS.getValueType().isFloatingPoint()) {
1542 assert(LHS.getValueType() != MVT::f128)(static_cast <bool> (LHS.getValueType() != MVT::f128) ?
void (0) : __assert_fail ("LHS.getValueType() != MVT::f128",
"/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1542, __extension__ __PRETTY_FUNCTION__))
;
1543 if (LHS.getValueType() == MVT::f16 && !FullFP16) {
1544 LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
1545 RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
1546 }
1547 Opcode = AArch64ISD::FCCMP;
1548 } else if (RHS.getOpcode() == ISD::SUB) {
1549 SDValue SubOp0 = RHS.getOperand(0);
1550 if (isNullConstant(SubOp0) && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
1551 // See emitComparison() on why we can only do this for SETEQ and SETNE.
1552 Opcode = AArch64ISD::CCMN;
1553 RHS = RHS.getOperand(1);
1554 }
1555 }
1556 if (Opcode == 0)
1557 Opcode = AArch64ISD::CCMP;
1558
1559 SDValue Condition = DAG.getConstant(Predicate, DL, MVT_CC);
1560 AArch64CC::CondCode InvOutCC = AArch64CC::getInvertedCondCode(OutCC);
1561 unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvOutCC);
1562 SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
1563 return DAG.getNode(Opcode, DL, MVT_CC, LHS, RHS, NZCVOp, Condition, CCOp);
1564}
1565
1566/// Returns true if @p Val is a tree of AND/OR/SETCC operations.
1567/// CanPushNegate is set to true if we can push a negate operation through
1568/// the tree in a was that we are left with AND operations and negate operations
1569/// at the leafs only. i.e. "not (or (or x y) z)" can be changed to
1570/// "and (and (not x) (not y)) (not z)"; "not (or (and x y) z)" cannot be
1571/// brought into such a form.
1572static bool isConjunctionDisjunctionTree(const SDValue Val, bool &CanNegate,
1573 unsigned Depth = 0) {
1574 if (!Val.hasOneUse())
1575 return false;
1576 unsigned Opcode = Val->getOpcode();
1577 if (Opcode == ISD::SETCC) {
1578 if (Val->getOperand(0).getValueType() == MVT::f128)
1579 return false;
1580 CanNegate = true;
1581 return true;
1582 }
1583 // Protect against exponential runtime and stack overflow.
1584 if (Depth > 6)
1585 return false;
1586 if (Opcode == ISD::AND || Opcode == ISD::OR) {
1587 SDValue O0 = Val->getOperand(0);
1588 SDValue O1 = Val->getOperand(1);
1589 bool CanNegateL;
1590 if (!isConjunctionDisjunctionTree(O0, CanNegateL, Depth+1))
1591 return false;
1592 bool CanNegateR;
1593 if (!isConjunctionDisjunctionTree(O1, CanNegateR, Depth+1))
1594 return false;
1595
1596 if (Opcode == ISD::OR) {
1597 // For an OR expression we need to be able to negate at least one side or
1598 // we cannot do the transformation at all.
1599 if (!CanNegateL && !CanNegateR)
1600 return false;
1601 // We can however change a (not (or x y)) to (and (not x) (not y)) if we
1602 // can negate the x and y subtrees.
1603 CanNegate = CanNegateL && CanNegateR;
1604 } else {
1605 // If the operands are OR expressions then we finally need to negate their
1606 // outputs, we can only do that for the operand with emitted last by
1607 // negating OutCC, not for both operands.
1608 bool NeedsNegOutL = O0->getOpcode() == ISD::OR;
1609 bool NeedsNegOutR = O1->getOpcode() == ISD::OR;
1610 if (NeedsNegOutL && NeedsNegOutR)
1611 return false;
1612 // We cannot negate an AND operation (it would become an OR),
1613 CanNegate = false;
1614 }
1615 return true;
1616 }
1617 return false;
1618}
1619
1620/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1621/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1622/// Tries to transform the given i1 producing node @p Val to a series compare
1623/// and conditional compare operations. @returns an NZCV flags producing node
1624/// and sets @p OutCC to the flags that should be tested or returns SDValue() if
1625/// transformation was not possible.
1626/// On recursive invocations @p PushNegate may be set to true to have negation
1627/// effects pushed to the tree leafs; @p Predicate is an NZCV flag predicate
1628/// for the comparisons in the current subtree; @p Depth limits the search
1629/// depth to avoid stack overflow.
1630static SDValue emitConjunctionDisjunctionTreeRec(SelectionDAG &DAG, SDValue Val,
1631 AArch64CC::CondCode &OutCC, bool Negate, SDValue CCOp,
1632 AArch64CC::CondCode Predicate) {
1633 // We're at a tree leaf, produce a conditional comparison operation.
1634 unsigned Opcode = Val->getOpcode();
1635 if (Opcode == ISD::SETCC) {
1636 SDValue LHS = Val->getOperand(0);
1637 SDValue RHS = Val->getOperand(1);
1638 ISD::CondCode CC = cast<CondCodeSDNode>(Val->getOperand(2))->get();
1639 bool isInteger = LHS.getValueType().isInteger();
1640 if (Negate)
1641 CC = getSetCCInverse(CC, isInteger);
1642 SDLoc DL(Val);
1643 // Determine OutCC and handle FP special case.
1644 if (isInteger) {
1645 OutCC = changeIntCCToAArch64CC(CC);
1646 } else {
1647 assert(LHS.getValueType().isFloatingPoint())(static_cast <bool> (LHS.getValueType().isFloatingPoint
()) ? void (0) : __assert_fail ("LHS.getValueType().isFloatingPoint()"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1647, __extension__ __PRETTY_FUNCTION__))
;
1648 AArch64CC::CondCode ExtraCC;
1649 changeFPCCToANDAArch64CC(CC, OutCC, ExtraCC);
1650 // Some floating point conditions can't be tested with a single condition
1651 // code. Construct an additional comparison in this case.
1652 if (ExtraCC != AArch64CC::AL) {
1653 SDValue ExtraCmp;
1654 if (!CCOp.getNode())
1655 ExtraCmp = emitComparison(LHS, RHS, CC, DL, DAG);
1656 else
1657 ExtraCmp = emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate,
1658 ExtraCC, DL, DAG);
1659 CCOp = ExtraCmp;
1660 Predicate = ExtraCC;
1661 }
1662 }
1663
1664 // Produce a normal comparison if we are first in the chain
1665 if (!CCOp)
1666 return emitComparison(LHS, RHS, CC, DL, DAG);
1667 // Otherwise produce a ccmp.
1668 return emitConditionalComparison(LHS, RHS, CC, CCOp, Predicate, OutCC, DL,
1669 DAG);
1670 }
1671 assert((Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) &&(static_cast <bool> ((Opcode == ISD::AND || (Opcode == ISD
::OR && Val->hasOneUse())) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1672, __extension__ __PRETTY_FUNCTION__))
1672 "Valid conjunction/disjunction tree")(static_cast <bool> ((Opcode == ISD::AND || (Opcode == ISD
::OR && Val->hasOneUse())) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(Opcode == ISD::AND || (Opcode == ISD::OR && Val->hasOneUse())) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1672, __extension__ __PRETTY_FUNCTION__))
;
1673
1674 // Check if both sides can be transformed.
1675 SDValue LHS = Val->getOperand(0);
1676 SDValue RHS = Val->getOperand(1);
1677
1678 // In case of an OR we need to negate our operands and the result.
1679 // (A v B) <=> not(not(A) ^ not(B))
1680 bool NegateOpsAndResult = Opcode == ISD::OR;
1681 // We can negate the results of all previous operations by inverting the
1682 // predicate flags giving us a free negation for one side. The other side
1683 // must be negatable by itself.
1684 if (NegateOpsAndResult) {
1685 // See which side we can negate.
1686 bool CanNegateL;
1687 bool isValidL = isConjunctionDisjunctionTree(LHS, CanNegateL);
1688 assert(isValidL && "Valid conjunction/disjunction tree")(static_cast <bool> (isValidL && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("isValidL && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1688, __extension__ __PRETTY_FUNCTION__))
;
1689 (void)isValidL;
1690
1691#ifndef NDEBUG
1692 bool CanNegateR;
1693 bool isValidR = isConjunctionDisjunctionTree(RHS, CanNegateR);
1694 assert(isValidR && "Valid conjunction/disjunction tree")(static_cast <bool> (isValidR && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("isValidR && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1694, __extension__ __PRETTY_FUNCTION__))
;
1695 assert((CanNegateL || CanNegateR) && "Valid conjunction/disjunction tree")(static_cast <bool> ((CanNegateL || CanNegateR) &&
"Valid conjunction/disjunction tree") ? void (0) : __assert_fail
("(CanNegateL || CanNegateR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1695, __extension__ __PRETTY_FUNCTION__))
;
1696#endif
1697
1698 // Order the side which we cannot negate to RHS so we can emit it first.
1699 if (!CanNegateL)
1700 std::swap(LHS, RHS);
1701 } else {
1702 bool NeedsNegOutL = LHS->getOpcode() == ISD::OR;
1703 assert((!NeedsNegOutL || RHS->getOpcode() != ISD::OR) &&(static_cast <bool> ((!NeedsNegOutL || RHS->getOpcode
() != ISD::OR) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1704, __extension__ __PRETTY_FUNCTION__))
1704 "Valid conjunction/disjunction tree")(static_cast <bool> ((!NeedsNegOutL || RHS->getOpcode
() != ISD::OR) && "Valid conjunction/disjunction tree"
) ? void (0) : __assert_fail ("(!NeedsNegOutL || RHS->getOpcode() != ISD::OR) && \"Valid conjunction/disjunction tree\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1704, __extension__ __PRETTY_FUNCTION__))
;
1705 // Order the side where we need to negate the output flags to RHS so it
1706 // gets emitted first.
1707 if (NeedsNegOutL)
1708 std::swap(LHS, RHS);
1709 }
1710
1711 // Emit RHS. If we want to negate the tree we only need to push a negate
1712 // through if we are already in a PushNegate case, otherwise we can negate
1713 // the "flags to test" afterwards.
1714 AArch64CC::CondCode RHSCC;
1715 SDValue CmpR = emitConjunctionDisjunctionTreeRec(DAG, RHS, RHSCC, Negate,
1716 CCOp, Predicate);
1717 if (NegateOpsAndResult && !Negate)
1718 RHSCC = AArch64CC::getInvertedCondCode(RHSCC);
1719 // Emit LHS. We may need to negate it.
1720 SDValue CmpL = emitConjunctionDisjunctionTreeRec(DAG, LHS, OutCC,
1721 NegateOpsAndResult, CmpR,
1722 RHSCC);
1723 // If we transformed an OR to and AND then we have to negate the result
1724 // (or absorb the Negate parameter).
1725 if (NegateOpsAndResult && !Negate)
1726 OutCC = AArch64CC::getInvertedCondCode(OutCC);
1727 return CmpL;
1728}
1729
1730/// Emit conjunction or disjunction tree with the CMP/FCMP followed by a chain
1731/// of CCMP/CFCMP ops. See @ref AArch64CCMP.
1732/// \see emitConjunctionDisjunctionTreeRec().
1733static SDValue emitConjunctionDisjunctionTree(SelectionDAG &DAG, SDValue Val,
1734 AArch64CC::CondCode &OutCC) {
1735 bool CanNegate;
1736 if (!isConjunctionDisjunctionTree(Val, CanNegate))
1737 return SDValue();
1738
1739 return emitConjunctionDisjunctionTreeRec(DAG, Val, OutCC, false, SDValue(),
1740 AArch64CC::AL);
1741}
1742
1743/// @}
1744
1745static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
1746 SDValue &AArch64cc, SelectionDAG &DAG,
1747 const SDLoc &dl) {
1748 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
1749 EVT VT = RHS.getValueType();
1750 uint64_t C = RHSC->getZExtValue();
1751 if (!isLegalArithImmed(C)) {
1752 // Constant does not fit, try adjusting it by one?
1753 switch (CC) {
1754 default:
1755 break;
1756 case ISD::SETLT:
1757 case ISD::SETGE:
1758 if ((VT == MVT::i32 && C != 0x80000000 &&
1759 isLegalArithImmed((uint32_t)(C - 1))) ||
1760 (VT == MVT::i64 && C != 0x80000000ULL &&
1761 isLegalArithImmed(C - 1ULL))) {
1762 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
1763 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1764 RHS = DAG.getConstant(C, dl, VT);
1765 }
1766 break;
1767 case ISD::SETULT:
1768 case ISD::SETUGE:
1769 if ((VT == MVT::i32 && C != 0 &&
1770 isLegalArithImmed((uint32_t)(C - 1))) ||
1771 (VT == MVT::i64 && C != 0ULL && isLegalArithImmed(C - 1ULL))) {
1772 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
1773 C = (VT == MVT::i32) ? (uint32_t)(C - 1) : C - 1;
1774 RHS = DAG.getConstant(C, dl, VT);
1775 }
1776 break;
1777 case ISD::SETLE:
1778 case ISD::SETGT:
1779 if ((VT == MVT::i32 && C != INT32_MAX(2147483647) &&
1780 isLegalArithImmed((uint32_t)(C + 1))) ||
1781 (VT == MVT::i64 && C != INT64_MAX(9223372036854775807L) &&
1782 isLegalArithImmed(C + 1ULL))) {
1783 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
1784 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1785 RHS = DAG.getConstant(C, dl, VT);
1786 }
1787 break;
1788 case ISD::SETULE:
1789 case ISD::SETUGT:
1790 if ((VT == MVT::i32 && C != UINT32_MAX(4294967295U) &&
1791 isLegalArithImmed((uint32_t)(C + 1))) ||
1792 (VT == MVT::i64 && C != UINT64_MAX(18446744073709551615UL) &&
1793 isLegalArithImmed(C + 1ULL))) {
1794 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
1795 C = (VT == MVT::i32) ? (uint32_t)(C + 1) : C + 1;
1796 RHS = DAG.getConstant(C, dl, VT);
1797 }
1798 break;
1799 }
1800 }
1801 }
1802 SDValue Cmp;
1803 AArch64CC::CondCode AArch64CC;
1804 if ((CC == ISD::SETEQ || CC == ISD::SETNE) && isa<ConstantSDNode>(RHS)) {
1805 const ConstantSDNode *RHSC = cast<ConstantSDNode>(RHS);
1806
1807 // The imm operand of ADDS is an unsigned immediate, in the range 0 to 4095.
1808 // For the i8 operand, the largest immediate is 255, so this can be easily
1809 // encoded in the compare instruction. For the i16 operand, however, the
1810 // largest immediate cannot be encoded in the compare.
1811 // Therefore, use a sign extending load and cmn to avoid materializing the
1812 // -1 constant. For example,
1813 // movz w1, #65535
1814 // ldrh w0, [x0, #0]
1815 // cmp w0, w1
1816 // >
1817 // ldrsh w0, [x0, #0]
1818 // cmn w0, #1
1819 // Fundamental, we're relying on the property that (zext LHS) == (zext RHS)
1820 // if and only if (sext LHS) == (sext RHS). The checks are in place to
1821 // ensure both the LHS and RHS are truly zero extended and to make sure the
1822 // transformation is profitable.
1823 if ((RHSC->getZExtValue() >> 16 == 0) && isa<LoadSDNode>(LHS) &&
1824 cast<LoadSDNode>(LHS)->getExtensionType() == ISD::ZEXTLOAD &&
1825 cast<LoadSDNode>(LHS)->getMemoryVT() == MVT::i16 &&
1826 LHS.getNode()->hasNUsesOfValue(1, 0)) {
1827 int16_t ValueofRHS = cast<ConstantSDNode>(RHS)->getZExtValue();
1828 if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
1829 SDValue SExt =
1830 DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
1831 DAG.getValueType(MVT::i16));
1832 Cmp = emitComparison(SExt, DAG.getConstant(ValueofRHS, dl,
1833 RHS.getValueType()),
1834 CC, dl, DAG);
1835 AArch64CC = changeIntCCToAArch64CC(CC);
1836 }
1837 }
1838
1839 if (!Cmp && (RHSC->isNullValue() || RHSC->isOne())) {
1840 if ((Cmp = emitConjunctionDisjunctionTree(DAG, LHS, AArch64CC))) {
1841 if ((CC == ISD::SETNE) ^ RHSC->isNullValue())
1842 AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC);
1843 }
1844 }
1845 }
1846
1847 if (!Cmp) {
1848 Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
1849 AArch64CC = changeIntCCToAArch64CC(CC);
1850 }
1851 AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
1852 return Cmp;
1853}
1854
1855static std::pair<SDValue, SDValue>
1856getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
1857 assert((Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) &&(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1858, __extension__ __PRETTY_FUNCTION__))
1858 "Unsupported value type")(static_cast <bool> ((Op.getValueType() == MVT::i32 || Op
.getValueType() == MVT::i64) && "Unsupported value type"
) ? void (0) : __assert_fail ("(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::i64) && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1858, __extension__ __PRETTY_FUNCTION__))
;
1859 SDValue Value, Overflow;
1860 SDLoc DL(Op);
1861 SDValue LHS = Op.getOperand(0);
1862 SDValue RHS = Op.getOperand(1);
1863 unsigned Opc = 0;
1864 switch (Op.getOpcode()) {
1865 default:
1866 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1866)
;
1867 case ISD::SADDO:
1868 Opc = AArch64ISD::ADDS;
1869 CC = AArch64CC::VS;
1870 break;
1871 case ISD::UADDO:
1872 Opc = AArch64ISD::ADDS;
1873 CC = AArch64CC::HS;
1874 break;
1875 case ISD::SSUBO:
1876 Opc = AArch64ISD::SUBS;
1877 CC = AArch64CC::VS;
1878 break;
1879 case ISD::USUBO:
1880 Opc = AArch64ISD::SUBS;
1881 CC = AArch64CC::LO;
1882 break;
1883 // Multiply needs a little bit extra work.
1884 case ISD::SMULO:
1885 case ISD::UMULO: {
1886 CC = AArch64CC::NE;
1887 bool IsSigned = Op.getOpcode() == ISD::SMULO;
1888 if (Op.getValueType() == MVT::i32) {
1889 unsigned ExtendOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1890 // For a 32 bit multiply with overflow check we want the instruction
1891 // selector to generate a widening multiply (SMADDL/UMADDL). For that we
1892 // need to generate the following pattern:
1893 // (i64 add 0, (i64 mul (i64 sext|zext i32 %a), (i64 sext|zext i32 %b))
1894 LHS = DAG.getNode(ExtendOpc, DL, MVT::i64, LHS);
1895 RHS = DAG.getNode(ExtendOpc, DL, MVT::i64, RHS);
1896 SDValue Mul = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1897 SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Mul,
1898 DAG.getConstant(0, DL, MVT::i64));
1899 // On AArch64 the upper 32 bits are always zero extended for a 32 bit
1900 // operation. We need to clear out the upper 32 bits, because we used a
1901 // widening multiply that wrote all 64 bits. In the end this should be a
1902 // noop.
1903 Value = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Add);
1904 if (IsSigned) {
1905 // The signed overflow check requires more than just a simple check for
1906 // any bit set in the upper 32 bits of the result. These bits could be
1907 // just the sign bits of a negative number. To perform the overflow
1908 // check we have to arithmetic shift right the 32nd bit of the result by
1909 // 31 bits. Then we compare the result to the upper 32 bits.
1910 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Add,
1911 DAG.getConstant(32, DL, MVT::i64));
1912 UpperBits = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, UpperBits);
1913 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i32, Value,
1914 DAG.getConstant(31, DL, MVT::i64));
1915 // It is important that LowerBits is last, otherwise the arithmetic
1916 // shift will not be folded into the compare (SUBS).
1917 SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32);
1918 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1919 .getValue(1);
1920 } else {
1921 // The overflow check for unsigned multiply is easy. We only need to
1922 // check if any of the upper 32 bits are set. This can be done with a
1923 // CMP (shifted register). For that we need to generate the following
1924 // pattern:
1925 // (i64 AArch64ISD::SUBS i64 0, (i64 srl i64 %Mul, i64 32)
1926 SDValue UpperBits = DAG.getNode(ISD::SRL, DL, MVT::i64, Mul,
1927 DAG.getConstant(32, DL, MVT::i64));
1928 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1929 Overflow =
1930 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1931 DAG.getConstant(0, DL, MVT::i64),
1932 UpperBits).getValue(1);
1933 }
1934 break;
1935 }
1936 assert(Op.getValueType() == MVT::i64 && "Expected an i64 value type")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"Expected an i64 value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"Expected an i64 value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 1936, __extension__ __PRETTY_FUNCTION__))
;
1937 // For the 64 bit multiply
1938 Value = DAG.getNode(ISD::MUL, DL, MVT::i64, LHS, RHS);
1939 if (IsSigned) {
1940 SDValue UpperBits = DAG.getNode(ISD::MULHS, DL, MVT::i64, LHS, RHS);
1941 SDValue LowerBits = DAG.getNode(ISD::SRA, DL, MVT::i64, Value,
1942 DAG.getConstant(63, DL, MVT::i64));
1943 // It is important that LowerBits is last, otherwise the arithmetic
1944 // shift will not be folded into the compare (SUBS).
1945 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1946 Overflow = DAG.getNode(AArch64ISD::SUBS, DL, VTs, UpperBits, LowerBits)
1947 .getValue(1);
1948 } else {
1949 SDValue UpperBits = DAG.getNode(ISD::MULHU, DL, MVT::i64, LHS, RHS);
1950 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::i32);
1951 Overflow =
1952 DAG.getNode(AArch64ISD::SUBS, DL, VTs,
1953 DAG.getConstant(0, DL, MVT::i64),
1954 UpperBits).getValue(1);
1955 }
1956 break;
1957 }
1958 } // switch (...)
1959
1960 if (Opc) {
1961 SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32);
1962
1963 // Emit the AArch64 operation with overflow check.
1964 Value = DAG.getNode(Opc, DL, VTs, LHS, RHS);
1965 Overflow = Value.getValue(1);
1966 }
1967 return std::make_pair(Value, Overflow);
1968}
1969
1970SDValue AArch64TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG,
1971 RTLIB::Libcall Call) const {
1972 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
1973 return makeLibCall(DAG, Call, MVT::f128, Ops, false, SDLoc(Op)).first;
1974}
1975
1976// Returns true if the given Op is the overflow flag result of an overflow
1977// intrinsic operation.
1978static bool isOverflowIntrOpRes(SDValue Op) {
1979 unsigned Opc = Op.getOpcode();
1980 return (Op.getResNo() == 1 &&
1981 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
1982 Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO));
1983}
1984
1985static SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) {
1986 SDValue Sel = Op.getOperand(0);
1987 SDValue Other = Op.getOperand(1);
1988 SDLoc dl(Sel);
1989
1990 // If the operand is an overflow checking operation, invert the condition
1991 // code and kill the Not operation. I.e., transform:
1992 // (xor (overflow_op_bool, 1))
1993 // -->
1994 // (csel 1, 0, invert(cc), overflow_op_bool)
1995 // ... which later gets transformed to just a cset instruction with an
1996 // inverted condition code, rather than a cset + eor sequence.
1997 if (isOneConstant(Other) && isOverflowIntrOpRes(Sel)) {
1998 // Only lower legal XALUO ops.
1999 if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
2000 return SDValue();
2001
2002 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2003 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2004 AArch64CC::CondCode CC;
2005 SDValue Value, Overflow;
2006 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
2007 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2008 return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
2009 CCVal, Overflow);
2010 }
2011 // If neither operand is a SELECT_CC, give up.
2012 if (Sel.getOpcode() != ISD::SELECT_CC)
2013 std::swap(Sel, Other);
2014 if (Sel.getOpcode() != ISD::SELECT_CC)
2015 return Op;
2016
2017 // The folding we want to perform is:
2018 // (xor x, (select_cc a, b, cc, 0, -1) )
2019 // -->
2020 // (csel x, (xor x, -1), cc ...)
2021 //
2022 // The latter will get matched to a CSINV instruction.
2023
2024 ISD::CondCode CC = cast<CondCodeSDNode>(Sel.getOperand(4))->get();
2025 SDValue LHS = Sel.getOperand(0);
2026 SDValue RHS = Sel.getOperand(1);
2027 SDValue TVal = Sel.getOperand(2);
2028 SDValue FVal = Sel.getOperand(3);
2029
2030 // FIXME: This could be generalized to non-integer comparisons.
2031 if (LHS.getValueType() != MVT::i32 && LHS.getValueType() != MVT::i64)
2032 return Op;
2033
2034 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
2035 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
2036
2037 // The values aren't constants, this isn't the pattern we're looking for.
2038 if (!CFVal || !CTVal)
2039 return Op;
2040
2041 // We can commute the SELECT_CC by inverting the condition. This
2042 // might be needed to make this fit into a CSINV pattern.
2043 if (CTVal->isAllOnesValue() && CFVal->isNullValue()) {
2044 std::swap(TVal, FVal);
2045 std::swap(CTVal, CFVal);
2046 CC = ISD::getSetCCInverse(CC, true);
2047 }
2048
2049 // If the constants line up, perform the transform!
2050 if (CTVal->isNullValue() && CFVal->isAllOnesValue()) {
2051 SDValue CCVal;
2052 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
2053
2054 FVal = Other;
2055 TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
2056 DAG.getConstant(-1ULL, dl, Other.getValueType()));
2057
2058 return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
2059 CCVal, Cmp);
2060 }
2061
2062 return Op;
2063}
2064
2065static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
2066 EVT VT = Op.getValueType();
2067
2068 // Let legalize expand this if it isn't a legal type yet.
2069 if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
2070 return SDValue();
2071
2072 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
2073
2074 unsigned Opc;
2075 bool ExtraOp = false;
2076 switch (Op.getOpcode()) {
2077 default:
2078 llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2078)
;
2079 case ISD::ADDC:
2080 Opc = AArch64ISD::ADDS;
2081 break;
2082 case ISD::SUBC:
2083 Opc = AArch64ISD::SUBS;
2084 break;
2085 case ISD::ADDE:
2086 Opc = AArch64ISD::ADCS;
2087 ExtraOp = true;
2088 break;
2089 case ISD::SUBE:
2090 Opc = AArch64ISD::SBCS;
2091 ExtraOp = true;
2092 break;
2093 }
2094
2095 if (!ExtraOp)
2096 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1));
2097 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1),
2098 Op.getOperand(2));
2099}
2100
2101static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
2102 // Let legalize expand this if it isn't a legal type yet.
2103 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
2104 return SDValue();
2105
2106 SDLoc dl(Op);
2107 AArch64CC::CondCode CC;
2108 // The actual operation that sets the overflow or carry flag.
2109 SDValue Value, Overflow;
2110 std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
2111
2112 // We use 0 and 1 as false and true values.
2113 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
2114 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
2115
2116 // We use an inverted condition, because the conditional select is inverted
2117 // too. This will allow it to be selected to a single instruction:
2118 // CSINC Wd, WZR, WZR, invert(cond).
2119 SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
2120 Overflow = DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal,
2121 CCVal, Overflow);
2122
2123 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
2124 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
2125}
2126
2127// Prefetch operands are:
2128// 1: Address to prefetch
2129// 2: bool isWrite
2130// 3: int locality (0 = no locality ... 3 = extreme locality)
2131// 4: bool isDataCache
2132static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
2133 SDLoc DL(Op);
2134 unsigned IsWrite = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
2135 unsigned Locality = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
2136 unsigned IsData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
2137
2138 bool IsStream = !Locality;
2139 // When the locality number is set
2140 if (Locality) {
2141 // The front-end should have filtered out the out-of-range values
2142 assert(Locality <= 3 && "Prefetch locality out-of-range")(static_cast <bool> (Locality <= 3 && "Prefetch locality out-of-range"
) ? void (0) : __assert_fail ("Locality <= 3 && \"Prefetch locality out-of-range\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2142, __extension__ __PRETTY_FUNCTION__))
;
2143 // The locality degree is the opposite of the cache speed.
2144 // Put the number the other way around.
2145 // The encoding starts at 0 for level 1
2146 Locality = 3 - Locality;
2147 }
2148
2149 // built the mask value encoding the expected behavior.
2150 unsigned PrfOp = (IsWrite << 4) | // Load/Store bit
2151 (!IsData << 3) | // IsDataCache bit
2152 (Locality << 1) | // Cache level bits
2153 (unsigned)IsStream; // Stream bit
2154 return DAG.getNode(AArch64ISD::PREFETCH, DL, MVT::Other, Op.getOperand(0),
2155 DAG.getConstant(PrfOp, DL, MVT::i32), Op.getOperand(1));
2156}
2157
2158SDValue AArch64TargetLowering::LowerFP_EXTEND(SDValue Op,
2159 SelectionDAG &DAG) const {
2160 assert(Op.getValueType() == MVT::f128 && "Unexpected lowering")(static_cast <bool> (Op.getValueType() == MVT::f128 &&
"Unexpected lowering") ? void (0) : __assert_fail ("Op.getValueType() == MVT::f128 && \"Unexpected lowering\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2160, __extension__ __PRETTY_FUNCTION__))
;
2161
2162 RTLIB::Libcall LC;
2163 LC = RTLIB::getFPEXT(Op.getOperand(0).getValueType(), Op.getValueType());
2164
2165 return LowerF128Call(Op, DAG, LC);
2166}
2167
2168SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
2169 SelectionDAG &DAG) const {
2170 if (Op.getOperand(0).getValueType() != MVT::f128) {
2171 // It's legal except when f128 is involved
2172 return Op;
2173 }
2174
2175 RTLIB::Libcall LC;
2176 LC = RTLIB::getFPROUND(Op.getOperand(0).getValueType(), Op.getValueType());
2177
2178 // FP_ROUND node has a second operand indicating whether it is known to be
2179 // precise. That doesn't take part in the LibCall so we can't directly use
2180 // LowerF128Call.
2181 SDValue SrcVal = Op.getOperand(0);
2182 return makeLibCall(DAG, LC, Op.getValueType(), SrcVal, /*isSigned*/ false,
2183 SDLoc(Op)).first;
2184}
2185
2186static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
2187 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2188 // Any additional optimization in this function should be recorded
2189 // in the cost tables.
2190 EVT InVT = Op.getOperand(0).getValueType();
2191 EVT VT = Op.getValueType();
2192 unsigned NumElts = InVT.getVectorNumElements();
2193
2194 // f16 vectors are promoted to f32 before a conversion.
2195 if (InVT.getVectorElementType() == MVT::f16) {
2196 MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts);
2197 SDLoc dl(Op);
2198 return DAG.getNode(
2199 Op.getOpcode(), dl, Op.getValueType(),
2200 DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
2201 }
2202
2203 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2204 SDLoc dl(Op);
2205 SDValue Cv =
2206 DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
2207 Op.getOperand(0));
2208 return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
2209 }
2210
2211 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2212 SDLoc dl(Op);
2213 MVT ExtVT =
2214 MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
2215 VT.getVectorNumElements());
2216 SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
2217 return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
2218 }
2219
2220 // Type changing conversions are illegal.
2221 return Op;
2222}
2223
2224SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
2225 SelectionDAG &DAG) const {
2226 if (Op.getOperand(0).getValueType().isVector())
2227 return LowerVectorFP_TO_INT(Op, DAG);
2228
2229 // f16 conversions are promoted to f32 when full fp16 is not supported.
2230 if (Op.getOperand(0).getValueType() == MVT::f16 &&
2231 !Subtarget->hasFullFP16()) {
2232 SDLoc dl(Op);
2233 return DAG.getNode(
2234 Op.getOpcode(), dl, Op.getValueType(),
2235 DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, Op.getOperand(0)));
2236 }
2237
2238 if (Op.getOperand(0).getValueType() != MVT::f128) {
2239 // It's legal except when f128 is involved
2240 return Op;
2241 }
2242
2243 RTLIB::Libcall LC;
2244 if (Op.getOpcode() == ISD::FP_TO_SINT)
2245 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), Op.getValueType());
2246 else
2247 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(), Op.getValueType());
2248
2249 SmallVector<SDValue, 2> Ops(Op->op_begin(), Op->op_end());
2250 return makeLibCall(DAG, LC, Op.getValueType(), Ops, false, SDLoc(Op)).first;
2251}
2252
2253static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
2254 // Warning: We maintain cost tables in AArch64TargetTransformInfo.cpp.
2255 // Any additional optimization in this function should be recorded
2256 // in the cost tables.
2257 EVT VT = Op.getValueType();
2258 SDLoc dl(Op);
2259 SDValue In = Op.getOperand(0);
2260 EVT InVT = In.getValueType();
2261
2262 if (VT.getSizeInBits() < InVT.getSizeInBits()) {
2263 MVT CastVT =
2264 MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
2265 InVT.getVectorNumElements());
2266 In = DAG.getNode(Op.getOpcode(), dl, CastVT, In);
2267 return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0, dl));
2268 }
2269
2270 if (VT.getSizeInBits() > InVT.getSizeInBits()) {
2271 unsigned CastOpc =
2272 Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
2273 EVT CastVT = VT.changeVectorElementTypeToInteger();
2274 In = DAG.getNode(CastOpc, dl, CastVT, In);
2275 return DAG.getNode(Op.getOpcode(), dl, VT, In);
2276 }
2277
2278 return Op;
2279}
2280
2281SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
2282 SelectionDAG &DAG) const {
2283 if (Op.getValueType().isVector())
2284 return LowerVectorINT_TO_FP(Op, DAG);
2285
2286 // f16 conversions are promoted to f32 when full fp16 is not supported.
2287 if (Op.getValueType() == MVT::f16 &&
2288 !Subtarget->hasFullFP16()) {
2289 SDLoc dl(Op);
2290 return DAG.getNode(
2291 ISD::FP_ROUND, dl, MVT::f16,
2292 DAG.getNode(Op.getOpcode(), dl, MVT::f32, Op.getOperand(0)),
2293 DAG.getIntPtrConstant(0, dl));
2294 }
2295
2296 // i128 conversions are libcalls.
2297 if (Op.getOperand(0).getValueType() == MVT::i128)
2298 return SDValue();
2299
2300 // Other conversions are legal, unless it's to the completely software-based
2301 // fp128.
2302 if (Op.getValueType() != MVT::f128)
2303 return Op;
2304
2305 RTLIB::Libcall LC;
2306 if (Op.getOpcode() == ISD::SINT_TO_FP)
2307 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2308 else
2309 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(), Op.getValueType());
2310
2311 return LowerF128Call(Op, DAG, LC);
2312}
2313
2314SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
2315 SelectionDAG &DAG) const {
2316 // For iOS, we want to call an alternative entry point: __sincos_stret,
2317 // which returns the values in two S / D registers.
2318 SDLoc dl(Op);
2319 SDValue Arg = Op.getOperand(0);
2320 EVT ArgVT = Arg.getValueType();
2321 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
2322
2323 ArgListTy Args;
2324 ArgListEntry Entry;
2325
2326 Entry.Node = Arg;
2327 Entry.Ty = ArgTy;
2328 Entry.IsSExt = false;
2329 Entry.IsZExt = false;
2330 Args.push_back(Entry);
2331
2332 RTLIB::Libcall LC = ArgVT == MVT::f64 ? RTLIB::SINCOS_STRET_F64
2333 : RTLIB::SINCOS_STRET_F32;
2334 const char *LibcallName = getLibcallName(LC);
2335 SDValue Callee =
2336 DAG.getExternalSymbol(LibcallName, getPointerTy(DAG.getDataLayout()));
2337
2338 StructType *RetTy = StructType::get(ArgTy, ArgTy);
2339 TargetLowering::CallLoweringInfo CLI(DAG);
2340 CLI.setDebugLoc(dl)
2341 .setChain(DAG.getEntryNode())
2342 .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
2343
2344 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2345 return CallResult.first;
2346}
2347
2348static SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) {
2349 if (Op.getValueType() != MVT::f16)
2350 return SDValue();
2351
2352 assert(Op.getOperand(0).getValueType() == MVT::i16)(static_cast <bool> (Op.getOperand(0).getValueType() ==
MVT::i16) ? void (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::i16"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2352, __extension__ __PRETTY_FUNCTION__))
;
2353 SDLoc DL(Op);
2354
2355 Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op.getOperand(0));
2356 Op = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Op);
2357 return SDValue(
2358 DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::f16, Op,
2359 DAG.getTargetConstant(AArch64::hsub, DL, MVT::i32)),
2360 0);
2361}
2362
2363static EVT getExtensionTo64Bits(const EVT &OrigVT) {
2364 if (OrigVT.getSizeInBits() >= 64)
2365 return OrigVT;
2366
2367 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2367, __extension__ __PRETTY_FUNCTION__))
;
2368
2369 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
2370 switch (OrigSimpleTy) {
2371 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2371)
;
2372 case MVT::v2i8:
2373 case MVT::v2i16:
2374 return MVT::v2i32;
2375 case MVT::v4i8:
2376 return MVT::v4i16;
2377 }
2378}
2379
2380static SDValue addRequiredExtensionForVectorMULL(SDValue N, SelectionDAG &DAG,
2381 const EVT &OrigTy,
2382 const EVT &ExtTy,
2383 unsigned ExtOpcode) {
2384 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
2385 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
2386 // 64-bits we need to insert a new extension so that it will be 64-bits.
2387 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2387, __extension__ __PRETTY_FUNCTION__))
;
2388 if (OrigTy.getSizeInBits() >= 64)
2389 return N;
2390
2391 // Must extend size to at least 64 bits to be used as an operand for VMULL.
2392 EVT NewVT = getExtensionTo64Bits(OrigTy);
2393
2394 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
2395}
2396
2397static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
2398 bool isSigned) {
2399 EVT VT = N->getValueType(0);
2400
2401 if (N->getOpcode() != ISD::BUILD_VECTOR)
2402 return false;
2403
2404 for (const SDValue &Elt : N->op_values()) {
2405 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2406 unsigned EltSize = VT.getScalarSizeInBits();
2407 unsigned HalfSize = EltSize / 2;
2408 if (isSigned) {
2409 if (!isIntN(HalfSize, C->getSExtValue()))
2410 return false;
2411 } else {
2412 if (!isUIntN(HalfSize, C->getZExtValue()))
2413 return false;
2414 }
2415 continue;
2416 }
2417 return false;
2418 }
2419
2420 return true;
2421}
2422
2423static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
2424 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
2425 return addRequiredExtensionForVectorMULL(N->getOperand(0), DAG,
2426 N->getOperand(0)->getValueType(0),
2427 N->getValueType(0),
2428 N->getOpcode());
2429
2430 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2430, __extension__ __PRETTY_FUNCTION__))
;
2431 EVT VT = N->getValueType(0);
2432 SDLoc dl(N);
2433 unsigned EltSize = VT.getScalarSizeInBits() / 2;
2434 unsigned NumElts = VT.getVectorNumElements();
2435 MVT TruncVT = MVT::getIntegerVT(EltSize);
2436 SmallVector<SDValue, 8> Ops;
2437 for (unsigned i = 0; i != NumElts; ++i) {
2438 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
2439 const APInt &CInt = C->getAPIntValue();
2440 // Element types smaller than 32 bits are not legal, so use i32 elements.
2441 // The values are implicitly truncated so sext vs. zext doesn't matter.
2442 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
2443 }
2444 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
2445}
2446
2447static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
2448 return N->getOpcode() == ISD::SIGN_EXTEND ||
2449 isExtendedBUILD_VECTOR(N, DAG, true);
2450}
2451
2452static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
2453 return N->getOpcode() == ISD::ZERO_EXTEND ||
2454 isExtendedBUILD_VECTOR(N, DAG, false);
2455}
2456
2457static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
2458 unsigned Opcode = N->getOpcode();
2459 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2460 SDNode *N0 = N->getOperand(0).getNode();
2461 SDNode *N1 = N->getOperand(1).getNode();
2462 return N0->hasOneUse() && N1->hasOneUse() &&
2463 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
2464 }
2465 return false;
2466}
2467
2468static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
2469 unsigned Opcode = N->getOpcode();
2470 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
2471 SDNode *N0 = N->getOperand(0).getNode();
2472 SDNode *N1 = N->getOperand(1).getNode();
2473 return N0->hasOneUse() && N1->hasOneUse() &&
2474 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
2475 }
2476 return false;
2477}
2478
2479static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
2480 // Multiplications are only custom-lowered for 128-bit vectors so that
2481 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
2482 EVT VT = Op.getValueType();
2483 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2484, __extension__ __PRETTY_FUNCTION__))
2484 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2484, __extension__ __PRETTY_FUNCTION__))
;
2485 SDNode *N0 = Op.getOperand(0).getNode();
2486 SDNode *N1 = Op.getOperand(1).getNode();
2487 unsigned NewOpc = 0;
2488 bool isMLA = false;
2489 bool isN0SExt = isSignExtended(N0, DAG);
2490 bool isN1SExt = isSignExtended(N1, DAG);
2491 if (isN0SExt && isN1SExt)
2492 NewOpc = AArch64ISD::SMULL;
2493 else {
2494 bool isN0ZExt = isZeroExtended(N0, DAG);
2495 bool isN1ZExt = isZeroExtended(N1, DAG);
2496 if (isN0ZExt && isN1ZExt)
2497 NewOpc = AArch64ISD::UMULL;
2498 else if (isN1SExt || isN1ZExt) {
2499 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
2500 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
2501 if (isN1SExt && isAddSubSExt(N0, DAG)) {
2502 NewOpc = AArch64ISD::SMULL;
2503 isMLA = true;
2504 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
2505 NewOpc = AArch64ISD::UMULL;
2506 isMLA = true;
2507 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
2508 std::swap(N0, N1);
2509 NewOpc = AArch64ISD::UMULL;
2510 isMLA = true;
2511 }
2512 }
2513
2514 if (!NewOpc) {
2515 if (VT == MVT::v2i64)
2516 // Fall through to expand this. It is not legal.
2517 return SDValue();
2518 else
2519 // Other vector multiplications are legal.
2520 return Op;
2521 }
2522 }
2523
2524 // Legalize to a S/UMULL instruction
2525 SDLoc DL(Op);
2526 SDValue Op0;
2527 SDValue Op1 = skipExtensionForVectorMULL(N1, DAG);
2528 if (!isMLA) {
2529 Op0 = skipExtensionForVectorMULL(N0, DAG);
2530 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2532, __extension__ __PRETTY_FUNCTION__))
2531 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2532, __extension__ __PRETTY_FUNCTION__))
2532 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2532, __extension__ __PRETTY_FUNCTION__))
;
2533 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
2534 }
2535 // Optimizing (zext A + zext B) * C, to (S/UMULL A, C) + (S/UMULL B, C) during
2536 // isel lowering to take advantage of no-stall back to back s/umul + s/umla.
2537 // This is true for CPUs with accumulate forwarding such as Cortex-A53/A57
2538 SDValue N00 = skipExtensionForVectorMULL(N0->getOperand(0).getNode(), DAG);
2539 SDValue N01 = skipExtensionForVectorMULL(N0->getOperand(1).getNode(), DAG);
2540 EVT Op1VT = Op1.getValueType();
2541 return DAG.getNode(N0->getOpcode(), DL, VT,
2542 DAG.getNode(NewOpc, DL, VT,
2543 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
2544 DAG.getNode(NewOpc, DL, VT,
2545 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
2546}
2547
2548SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
2549 SelectionDAG &DAG) const {
2550 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
2551 SDLoc dl(Op);
2552 switch (IntNo) {
2553 default: return SDValue(); // Don't custom lower most intrinsics.
2554 case Intrinsic::thread_pointer: {
2555 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2556 return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
2557 }
2558 case Intrinsic::aarch64_neon_abs:
2559 return DAG.getNode(ISD::ABS, dl, Op.getValueType(),
2560 Op.getOperand(1));
2561 case Intrinsic::aarch64_neon_smax:
2562 return DAG.getNode(ISD::SMAX, dl, Op.getValueType(),
2563 Op.getOperand(1), Op.getOperand(2));
2564 case Intrinsic::aarch64_neon_umax:
2565 return DAG.getNode(ISD::UMAX, dl, Op.getValueType(),
2566 Op.getOperand(1), Op.getOperand(2));
2567 case Intrinsic::aarch64_neon_smin:
2568 return DAG.getNode(ISD::SMIN, dl, Op.getValueType(),
2569 Op.getOperand(1), Op.getOperand(2));
2570 case Intrinsic::aarch64_neon_umin:
2571 return DAG.getNode(ISD::UMIN, dl, Op.getValueType(),
2572 Op.getOperand(1), Op.getOperand(2));
2573 }
2574}
2575
2576SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
2577 SelectionDAG &DAG) const {
2578 DEBUG(dbgs() << "Custom lowering: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Custom lowering: "; } }
while (false)
;
2579 DEBUG(Op.dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { Op.dump(); } } while (false)
;
2580
2581 switch (Op.getOpcode()) {
2582 default:
2583 llvm_unreachable("unimplemented operand")::llvm::llvm_unreachable_internal("unimplemented operand", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2583)
;
2584 return SDValue();
2585 case ISD::BITCAST:
2586 return LowerBITCAST(Op, DAG);
2587 case ISD::GlobalAddress:
2588 return LowerGlobalAddress(Op, DAG);
2589 case ISD::GlobalTLSAddress:
2590 return LowerGlobalTLSAddress(Op, DAG);
2591 case ISD::SETCC:
2592 return LowerSETCC(Op, DAG);
2593 case ISD::BR_CC:
2594 return LowerBR_CC(Op, DAG);
2595 case ISD::SELECT:
2596 return LowerSELECT(Op, DAG);
2597 case ISD::SELECT_CC:
2598 return LowerSELECT_CC(Op, DAG);
2599 case ISD::JumpTable:
2600 return LowerJumpTable(Op, DAG);
2601 case ISD::ConstantPool:
2602 return LowerConstantPool(Op, DAG);
2603 case ISD::BlockAddress:
2604 return LowerBlockAddress(Op, DAG);
2605 case ISD::VASTART:
2606 return LowerVASTART(Op, DAG);
2607 case ISD::VACOPY:
2608 return LowerVACOPY(Op, DAG);
2609 case ISD::VAARG:
2610 return LowerVAARG(Op, DAG);
2611 case ISD::ADDC:
2612 case ISD::ADDE:
2613 case ISD::SUBC:
2614 case ISD::SUBE:
2615 return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
2616 case ISD::SADDO:
2617 case ISD::UADDO:
2618 case ISD::SSUBO:
2619 case ISD::USUBO:
2620 case ISD::SMULO:
2621 case ISD::UMULO:
2622 return LowerXALUO(Op, DAG);
2623 case ISD::FADD:
2624 return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
2625 case ISD::FSUB:
2626 return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
2627 case ISD::FMUL:
2628 return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
2629 case ISD::FDIV:
2630 return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
2631 case ISD::FP_ROUND:
2632 return LowerFP_ROUND(Op, DAG);
2633 case ISD::FP_EXTEND:
2634 return LowerFP_EXTEND(Op, DAG);
2635 case ISD::FRAMEADDR:
2636 return LowerFRAMEADDR(Op, DAG);
2637 case ISD::RETURNADDR:
2638 return LowerRETURNADDR(Op, DAG);
2639 case ISD::INSERT_VECTOR_ELT:
2640 return LowerINSERT_VECTOR_ELT(Op, DAG);
2641 case ISD::EXTRACT_VECTOR_ELT:
2642 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2643 case ISD::BUILD_VECTOR:
2644 return LowerBUILD_VECTOR(Op, DAG);
2645 case ISD::VECTOR_SHUFFLE:
2646 return LowerVECTOR_SHUFFLE(Op, DAG);
2647 case ISD::EXTRACT_SUBVECTOR:
2648 return LowerEXTRACT_SUBVECTOR(Op, DAG);
2649 case ISD::SRA:
2650 case ISD::SRL:
2651 case ISD::SHL:
2652 return LowerVectorSRA_SRL_SHL(Op, DAG);
2653 case ISD::SHL_PARTS:
2654 return LowerShiftLeftParts(Op, DAG);
2655 case ISD::SRL_PARTS:
2656 case ISD::SRA_PARTS:
2657 return LowerShiftRightParts(Op, DAG);
2658 case ISD::CTPOP:
2659 return LowerCTPOP(Op, DAG);
2660 case ISD::FCOPYSIGN:
2661 return LowerFCOPYSIGN(Op, DAG);
2662 case ISD::AND:
2663 return LowerVectorAND(Op, DAG);
2664 case ISD::OR:
2665 return LowerVectorOR(Op, DAG);
2666 case ISD::XOR:
2667 return LowerXOR(Op, DAG);
2668 case ISD::PREFETCH:
2669 return LowerPREFETCH(Op, DAG);
2670 case ISD::SINT_TO_FP:
2671 case ISD::UINT_TO_FP:
2672 return LowerINT_TO_FP(Op, DAG);
2673 case ISD::FP_TO_SINT:
2674 case ISD::FP_TO_UINT:
2675 return LowerFP_TO_INT(Op, DAG);
2676 case ISD::FSINCOS:
2677 return LowerFSINCOS(Op, DAG);
2678 case ISD::MUL:
2679 return LowerMUL(Op, DAG);
2680 case ISD::INTRINSIC_WO_CHAIN:
2681 return LowerINTRINSIC_WO_CHAIN(Op, DAG);
2682 case ISD::VECREDUCE_ADD:
2683 case ISD::VECREDUCE_SMAX:
2684 case ISD::VECREDUCE_SMIN:
2685 case ISD::VECREDUCE_UMAX:
2686 case ISD::VECREDUCE_UMIN:
2687 case ISD::VECREDUCE_FMAX:
2688 case ISD::VECREDUCE_FMIN:
2689 return LowerVECREDUCE(Op, DAG);
2690 case ISD::ATOMIC_LOAD_SUB:
2691 return LowerATOMIC_LOAD_SUB(Op, DAG);
2692 case ISD::ATOMIC_LOAD_AND:
2693 return LowerATOMIC_LOAD_AND(Op, DAG);
2694 case ISD::DYNAMIC_STACKALLOC:
2695 return LowerDYNAMIC_STACKALLOC(Op, DAG);
2696 }
2697}
2698
2699//===----------------------------------------------------------------------===//
2700// Calling Convention Implementation
2701//===----------------------------------------------------------------------===//
2702
2703#include "AArch64GenCallingConv.inc"
2704
2705/// Selects the correct CCAssignFn for a given CallingConvention value.
2706CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
2707 bool IsVarArg) const {
2708 switch (CC) {
2709 default:
2710 report_fatal_error("Unsupported calling convention.");
2711 case CallingConv::WebKit_JS:
2712 return CC_AArch64_WebKit_JS;
2713 case CallingConv::GHC:
2714 return CC_AArch64_GHC;
2715 case CallingConv::C:
2716 case CallingConv::Fast:
2717 case CallingConv::PreserveMost:
2718 case CallingConv::CXX_FAST_TLS:
2719 case CallingConv::Swift:
2720 if (Subtarget->isTargetWindows() && IsVarArg)
2721 return CC_AArch64_Win64_VarArg;
2722 if (!Subtarget->isTargetDarwin())
2723 return CC_AArch64_AAPCS;
2724 return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
2725 case CallingConv::Win64:
2726 return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
2727 }
2728}
2729
2730CCAssignFn *
2731AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
2732 return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
2733 : RetCC_AArch64_AAPCS;
2734}
2735
2736SDValue AArch64TargetLowering::LowerFormalArguments(
2737 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
2738 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
2739 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
2740 MachineFunction &MF = DAG.getMachineFunction();
2741 MachineFrameInfo &MFI = MF.getFrameInfo();
2742 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
2743
2744 // Assign locations to all of the incoming arguments.
2745 SmallVector<CCValAssign, 16> ArgLocs;
2746 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
2747 *DAG.getContext());
2748
2749 // At this point, Ins[].VT may already be promoted to i32. To correctly
2750 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
2751 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
2752 // Since AnalyzeFormalArguments uses Ins[].VT for both ValVT and LocVT, here
2753 // we use a special version of AnalyzeFormalArguments to pass in ValVT and
2754 // LocVT.
2755 unsigned NumArgs = Ins.size();
2756 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
2757 unsigned CurArgIdx = 0;
2758 for (unsigned i = 0; i != NumArgs; ++i) {
2759 MVT ValVT = Ins[i].VT;
2760 if (Ins[i].isOrigArg()) {
2761 std::advance(CurOrigArg, Ins[i].getOrigArgIndex() - CurArgIdx);
2762 CurArgIdx = Ins[i].getOrigArgIndex();
2763
2764 // Get type of the original argument.
2765 EVT ActualVT = getValueType(DAG.getDataLayout(), CurOrigArg->getType(),
2766 /*AllowUnknown*/ true);
2767 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : MVT::Other;
2768 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
2769 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
2770 ValVT = MVT::i8;
2771 else if (ActualMVT == MVT::i16)
2772 ValVT = MVT::i16;
2773 }
2774 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
2775 bool Res =
2776 AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
2777 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2777, __extension__ __PRETTY_FUNCTION__))
;
2778 (void)Res;
2779 }
2780 assert(ArgLocs.size() == Ins.size())(static_cast <bool> (ArgLocs.size() == Ins.size()) ? void
(0) : __assert_fail ("ArgLocs.size() == Ins.size()", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2780, __extension__ __PRETTY_FUNCTION__))
;
2781 SmallVector<SDValue, 16> ArgValues;
2782 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
2783 CCValAssign &VA = ArgLocs[i];
2784
2785 if (Ins[i].Flags.isByVal()) {
2786 // Byval is used for HFAs in the PCS, but the system should work in a
2787 // non-compliant manner for larger structs.
2788 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2789 int Size = Ins[i].Flags.getByValSize();
2790 unsigned NumRegs = (Size + 7) / 8;
2791
2792 // FIXME: This works on big-endian for composite byvals, which are the common
2793 // case. It should also work for fundamental types too.
2794 unsigned FrameIdx =
2795 MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
2796 SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
2797 InVals.push_back(FrameIdxN);
2798
2799 continue;
2800 }
2801
2802 if (VA.isRegLoc()) {
2803 // Arguments stored in registers.
2804 EVT RegVT = VA.getLocVT();
2805
2806 SDValue ArgValue;
2807 const TargetRegisterClass *RC;
2808
2809 if (RegVT == MVT::i32)
2810 RC = &AArch64::GPR32RegClass;
2811 else if (RegVT == MVT::i64)
2812 RC = &AArch64::GPR64RegClass;
2813 else if (RegVT == MVT::f16)
2814 RC = &AArch64::FPR16RegClass;
2815 else if (RegVT == MVT::f32)
2816 RC = &AArch64::FPR32RegClass;
2817 else if (RegVT == MVT::f64 || RegVT.is64BitVector())
2818 RC = &AArch64::FPR64RegClass;
2819 else if (RegVT == MVT::f128 || RegVT.is128BitVector())
2820 RC = &AArch64::FPR128RegClass;
2821 else
2822 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2822)
;
2823
2824 // Transform the arguments in physical registers into virtual ones.
2825 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
2826 ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegVT);
2827
2828 // If this is an 8, 16 or 32-bit value, it is really passed promoted
2829 // to 64 bits. Insert an assert[sz]ext to capture this, then
2830 // truncate to the right size.
2831 switch (VA.getLocInfo()) {
2832 default:
2833 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2833)
;
2834 case CCValAssign::Full:
2835 break;
2836 case CCValAssign::BCvt:
2837 ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
2838 break;
2839 case CCValAssign::AExt:
2840 case CCValAssign::SExt:
2841 case CCValAssign::ZExt:
2842 // SelectionDAGBuilder will insert appropriate AssertZExt & AssertSExt
2843 // nodes after our lowering.
2844 assert(RegVT == Ins[i].VT && "incorrect register location selected")(static_cast <bool> (RegVT == Ins[i].VT && "incorrect register location selected"
) ? void (0) : __assert_fail ("RegVT == Ins[i].VT && \"incorrect register location selected\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2844, __extension__ __PRETTY_FUNCTION__))
;
2845 break;
2846 }
2847
2848 InVals.push_back(ArgValue);
2849
2850 } else { // VA.isRegLoc()
2851 assert(VA.isMemLoc() && "CCValAssign is neither reg nor mem")(static_cast <bool> (VA.isMemLoc() && "CCValAssign is neither reg nor mem"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"CCValAssign is neither reg nor mem\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 2851, __extension__ __PRETTY_FUNCTION__))
;
2852 unsigned ArgOffset = VA.getLocMemOffset();
2853 unsigned ArgSize = VA.getValVT().getSizeInBits() / 8;
2854
2855 uint32_t BEAlign = 0;
2856 if (!Subtarget->isLittleEndian() && ArgSize < 8 &&
2857 !Ins[i].Flags.isInConsecutiveRegs())
2858 BEAlign = 8 - ArgSize;
2859
2860 int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
2861
2862 // Create load nodes to retrieve arguments from the stack.
2863 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2864 SDValue ArgValue;
2865
2866 // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
2867 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
2868 MVT MemVT = VA.getValVT();
2869
2870 switch (VA.getLocInfo()) {
2871 default:
2872 break;
2873 case CCValAssign::BCvt:
2874 MemVT = VA.getLocVT();
2875 break;
2876 case CCValAssign::SExt:
2877 ExtType = ISD::SEXTLOAD;
2878 break;
2879 case CCValAssign::ZExt:
2880 ExtType = ISD::ZEXTLOAD;
2881 break;
2882 case CCValAssign::AExt:
2883 ExtType = ISD::EXTLOAD;
2884 break;
2885 }
2886
2887 ArgValue = DAG.getExtLoad(
2888 ExtType, DL, VA.getLocVT(), Chain, FIN,
2889 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI),
2890 MemVT);
2891
2892 InVals.push_back(ArgValue);
2893 }
2894 }
2895
2896 // varargs
2897 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2898 if (isVarArg) {
2899 if (!Subtarget->isTargetDarwin() || IsWin64) {
2900 // The AAPCS variadic function ABI is identical to the non-variadic
2901 // one. As a result there may be more arguments in registers and we should
2902 // save them for future reference.
2903 // Win64 variadic functions also pass arguments in registers, but all float
2904 // arguments are passed in integer registers.
2905 saveVarArgRegisters(CCInfo, DAG, DL, Chain);
2906 }
2907
2908 // This will point to the next argument passed via stack.
2909 unsigned StackOffset = CCInfo.getNextStackOffset();
2910 // We currently pass all varargs at 8-byte alignment.
2911 StackOffset = ((StackOffset + 7) & ~7);
2912 FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
2913 }
2914
2915 unsigned StackArgSize = CCInfo.getNextStackOffset();
2916 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
2917 if (DoesCalleeRestoreStack(CallConv, TailCallOpt)) {
2918 // This is a non-standard ABI so by fiat I say we're allowed to make full
2919 // use of the stack area to be popped, which must be aligned to 16 bytes in
2920 // any case:
2921 StackArgSize = alignTo(StackArgSize, 16);
2922
2923 // If we're expected to restore the stack (e.g. fastcc) then we'll be adding
2924 // a multiple of 16.
2925 FuncInfo->setArgumentStackToRestore(StackArgSize);
2926
2927 // This realignment carries over to the available bytes below. Our own
2928 // callers will guarantee the space is free by giving an aligned value to
2929 // CALLSEQ_START.
2930 }
2931 // Even if we're not expected to free up the space, it's useful to know how
2932 // much is there while considering tail calls (because we can reuse it).
2933 FuncInfo->setBytesInStackArgArea(StackArgSize);
2934
2935 return Chain;
2936}
2937
2938void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
2939 SelectionDAG &DAG,
2940 const SDLoc &DL,
2941 SDValue &Chain) const {
2942 MachineFunction &MF = DAG.getMachineFunction();
2943 MachineFrameInfo &MFI = MF.getFrameInfo();
2944 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
2945 auto PtrVT = getPointerTy(DAG.getDataLayout());
2946 bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
2947
2948 SmallVector<SDValue, 8> MemOps;
2949
2950 static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
2951 AArch64::X3, AArch64::X4, AArch64::X5,
2952 AArch64::X6, AArch64::X7 };
2953 static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
2954 unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
2955
2956 unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
2957 int GPRIdx = 0;
2958 if (GPRSaveSize != 0) {
2959 if (IsWin64) {
2960 GPRIdx = MFI.CreateFixedObject(GPRSaveSize, -(int)GPRSaveSize, false);
2961 if (GPRSaveSize & 15)
2962 // The extra size here, if triggered, will always be 8.
2963 MFI.CreateFixedObject(16 - (GPRSaveSize & 15), -(int)alignTo(GPRSaveSize, 16), false);
2964 } else
2965 GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
2966
2967 SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
2968
2969 for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
2970 unsigned VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
2971 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
2972 SDValue Store = DAG.getStore(
2973 Val.getValue(1), DL, Val, FIN,
2974 IsWin64
2975 ? MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
2976 GPRIdx,
2977 (i - FirstVariadicGPR) * 8)
2978 : MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 8));
2979 MemOps.push_back(Store);
2980 FIN =
2981 DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getConstant(8, DL, PtrVT));
2982 }
2983 }
2984 FuncInfo->setVarArgsGPRIndex(GPRIdx);
2985 FuncInfo->setVarArgsGPRSize(GPRSaveSize);
2986
2987 if (Subtarget->hasFPARMv8() && !IsWin64) {
2988 static const MCPhysReg FPRArgRegs[] = {
2989 AArch64::Q0, AArch64::Q1, AArch64::Q2, AArch64::Q3,
2990 AArch64::Q4, AArch64::Q5, AArch64::Q6, AArch64::Q7};
2991 static const unsigned NumFPRArgRegs = array_lengthof(FPRArgRegs);
2992 unsigned FirstVariadicFPR = CCInfo.getFirstUnallocated(FPRArgRegs);
2993
2994 unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
2995 int FPRIdx = 0;
2996 if (FPRSaveSize != 0) {
2997 FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
2998
2999 SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
3000
3001 for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) {
3002 unsigned VReg = MF.addLiveIn(FPRArgRegs[i], &AArch64::FPR128RegClass);
3003 SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128);
3004
3005 SDValue Store = DAG.getStore(
3006 Val.getValue(1), DL, Val, FIN,
3007 MachinePointerInfo::getStack(DAG.getMachineFunction(), i * 16));
3008 MemOps.push_back(Store);
3009 FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN,
3010 DAG.getConstant(16, DL, PtrVT));
3011 }
3012 }
3013 FuncInfo->setVarArgsFPRIndex(FPRIdx);
3014 FuncInfo->setVarArgsFPRSize(FPRSaveSize);
3015 }
3016
3017 if (!MemOps.empty()) {
3018 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3019 }
3020}
3021
3022/// LowerCallResult - Lower the result values of a call into the
3023/// appropriate copies out of appropriate physical registers.
3024SDValue AArch64TargetLowering::LowerCallResult(
3025 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3026 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
3027 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
3028 SDValue ThisVal) const {
3029 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3030 ? RetCC_AArch64_WebKit_JS
3031 : RetCC_AArch64_AAPCS;
3032 // Assign locations to each value returned by this call.
3033 SmallVector<CCValAssign, 16> RVLocs;
3034 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3035 *DAG.getContext());
3036 CCInfo.AnalyzeCallResult(Ins, RetCC);
3037
3038 // Copy all of the result registers out of their specified physreg.
3039 for (unsigned i = 0; i != RVLocs.size(); ++i) {
3040 CCValAssign VA = RVLocs[i];
3041
3042 // Pass 'this' value directly from the argument to return value, to avoid
3043 // reg unit interference
3044 if (i == 0 && isThisReturn) {
3045 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3046, __extension__ __PRETTY_FUNCTION__))
3046 "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i64 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i64 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3046, __extension__ __PRETTY_FUNCTION__))
;
3047 InVals.push_back(ThisVal);
3048 continue;
3049 }
3050
3051 SDValue Val =
3052 DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), InFlag);
3053 Chain = Val.getValue(1);
3054 InFlag = Val.getValue(2);
3055
3056 switch (VA.getLocInfo()) {
3057 default:
3058 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3058)
;
3059 case CCValAssign::Full:
3060 break;
3061 case CCValAssign::BCvt:
3062 Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
3063 break;
3064 }
3065
3066 InVals.push_back(Val);
3067 }
3068
3069 return Chain;
3070}
3071
3072/// Return true if the calling convention is one that we can guarantee TCO for.
3073static bool canGuaranteeTCO(CallingConv::ID CC) {
3074 return CC == CallingConv::Fast;
3075}
3076
3077/// Return true if we might ever do TCO for calls with this calling convention.
3078static bool mayTailCallThisCC(CallingConv::ID CC) {
3079 switch (CC) {
3080 case CallingConv::C:
3081 case CallingConv::PreserveMost:
3082 case CallingConv::Swift:
3083 return true;
3084 default:
3085 return canGuaranteeTCO(CC);
3086 }
3087}
3088
3089bool AArch64TargetLowering::isEligibleForTailCallOptimization(
3090 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
3091 const SmallVectorImpl<ISD::OutputArg> &Outs,
3092 const SmallVectorImpl<SDValue> &OutVals,
3093 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
3094 if (!mayTailCallThisCC(CalleeCC))
3095 return false;
3096
3097 MachineFunction &MF = DAG.getMachineFunction();
3098 const Function &CallerF = MF.getFunction();
3099 CallingConv::ID CallerCC = CallerF.getCallingConv();
3100 bool CCMatch = CallerCC == CalleeCC;
3101
3102 // Byval parameters hand the function a pointer directly into the stack area
3103 // we want to reuse during a tail call. Working around this *is* possible (see
3104 // X86) but less efficient and uglier in LowerCall.
3105 for (Function::const_arg_iterator i = CallerF.arg_begin(),
3106 e = CallerF.arg_end();
3107 i != e; ++i)
3108 if (i->hasByValAttr())
3109 return false;
3110
3111 if (getTargetMachine().Options.GuaranteedTailCallOpt)
3112 return canGuaranteeTCO(CalleeCC) && CCMatch;
3113
3114 // Externally-defined functions with weak linkage should not be
3115 // tail-called on AArch64 when the OS does not support dynamic
3116 // pre-emption of symbols, as the AAELF spec requires normal calls
3117 // to undefined weak functions to be replaced with a NOP or jump to the
3118 // next instruction. The behaviour of branch instructions in this
3119 // situation (as used for tail calls) is implementation-defined, so we
3120 // cannot rely on the linker replacing the tail call with a return.
3121 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3122 const GlobalValue *GV = G->getGlobal();
3123 const Triple &TT = getTargetMachine().getTargetTriple();
3124 if (GV->hasExternalWeakLinkage() &&
3125 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
3126 return false;
3127 }
3128
3129 // Now we search for cases where we can use a tail call without changing the
3130 // ABI. Sibcall is used in some places (particularly gcc) to refer to this
3131 // concept.
3132
3133 // I want anyone implementing a new calling convention to think long and hard
3134 // about this assert.
3135 assert((!isVarArg || CalleeCC == CallingConv::C) &&(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3136, __extension__ __PRETTY_FUNCTION__))
3136 "Unexpected variadic calling convention")(static_cast <bool> ((!isVarArg || CalleeCC == CallingConv
::C) && "Unexpected variadic calling convention") ? void
(0) : __assert_fail ("(!isVarArg || CalleeCC == CallingConv::C) && \"Unexpected variadic calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3136, __extension__ __PRETTY_FUNCTION__))
;
3137
3138 LLVMContext &C = *DAG.getContext();
3139 if (isVarArg && !Outs.empty()) {
3140 // At least two cases here: if caller is fastcc then we can't have any
3141 // memory arguments (we'd be expected to clean up the stack afterwards). If
3142 // caller is C then we could potentially use its argument area.
3143
3144 // FIXME: for now we take the most conservative of these in both cases:
3145 // disallow all variadic memory operands.
3146 SmallVector<CCValAssign, 16> ArgLocs;
3147 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3148
3149 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true));
3150 for (const CCValAssign &ArgLoc : ArgLocs)
3151 if (!ArgLoc.isRegLoc())
3152 return false;
3153 }
3154
3155 // Check that the call results are passed in the same way.
3156 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3157 CCAssignFnForCall(CalleeCC, isVarArg),
3158 CCAssignFnForCall(CallerCC, isVarArg)))
3159 return false;
3160 // The callee has to preserve all registers the caller needs to preserve.
3161 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3162 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3163 if (!CCMatch) {
3164 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3165 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3166 return false;
3167 }
3168
3169 // Nothing more to check if the callee is taking no arguments
3170 if (Outs.empty())
3171 return true;
3172
3173 SmallVector<CCValAssign, 16> ArgLocs;
3174 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
3175
3176 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
3177
3178 const AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3179
3180 // If the stack arguments for this call do not fit into our own save area then
3181 // the call cannot be made tail.
3182 if (CCInfo.getNextStackOffset() > FuncInfo->getBytesInStackArgArea())
3183 return false;
3184
3185 const MachineRegisterInfo &MRI = MF.getRegInfo();
3186 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3187 return false;
3188
3189 return true;
3190}
3191
3192SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
3193 SelectionDAG &DAG,
3194 MachineFrameInfo &MFI,
3195 int ClobberedFI) const {
3196 SmallVector<SDValue, 8> ArgChains;
3197 int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
3198 int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
3199
3200 // Include the original chain at the beginning of the list. When this is
3201 // used by target LowerCall hooks, this helps legalize find the
3202 // CALLSEQ_BEGIN node.
3203 ArgChains.push_back(Chain);
3204
3205 // Add a chain value for each stack argument corresponding
3206 for (SDNode::use_iterator U = DAG.getEntryNode().getNode()->use_begin(),
3207 UE = DAG.getEntryNode().getNode()->use_end();
3208 U != UE; ++U)
3209 if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
3210 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
3211 if (FI->getIndex() < 0) {
3212 int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
3213 int64_t InLastByte = InFirstByte;
3214 InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
3215
3216 if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
3217 (FirstByte <= InFirstByte && InFirstByte <= LastByte))
3218 ArgChains.push_back(SDValue(L, 1));
3219 }
3220
3221 // Build a tokenfactor for all the chains.
3222 return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains);
3223}
3224
3225bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
3226 bool TailCallOpt) const {
3227 return CallCC == CallingConv::Fast && TailCallOpt;
3228}
3229
3230/// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
3231/// and add input and output parameter nodes.
3232SDValue
3233AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
3234 SmallVectorImpl<SDValue> &InVals) const {
3235 SelectionDAG &DAG = CLI.DAG;
3236 SDLoc &DL = CLI.DL;
3237 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
3238 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
3239 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
3240 SDValue Chain = CLI.Chain;
3241 SDValue Callee = CLI.Callee;
3242 bool &IsTailCall = CLI.IsTailCall;
3243 CallingConv::ID CallConv = CLI.CallConv;
3244 bool IsVarArg = CLI.IsVarArg;
3245
3246 MachineFunction &MF = DAG.getMachineFunction();
3247 bool IsThisReturn = false;
3248
3249 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
3250 bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt;
3251 bool IsSibCall = false;
3252
3253 if (IsTailCall) {
3254 // Check if it's really possible to do a tail call.
3255 IsTailCall = isEligibleForTailCallOptimization(
3256 Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
3257 if (!IsTailCall && CLI.CS && CLI.CS.isMustTailCall())
3258 report_fatal_error("failed to perform tail call elimination on a call "
3259 "site marked musttail");
3260
3261 // A sibling call is one where we're under the usual C ABI and not planning
3262 // to change that but can still do a tail call:
3263 if (!TailCallOpt && IsTailCall)
3264 IsSibCall = true;
3265
3266 if (IsTailCall)
3267 ++NumTailCalls;
3268 }
3269
3270 // Analyze operands of the call, assigning locations to each operand.
3271 SmallVector<CCValAssign, 16> ArgLocs;
3272 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
3273 *DAG.getContext());
3274
3275 if (IsVarArg) {
3276 // Handle fixed and variable vector arguments differently.
3277 // Variable vector arguments always go into memory.
3278 unsigned NumArgs = Outs.size();
3279
3280 for (unsigned i = 0; i != NumArgs; ++i) {
3281 MVT ArgVT = Outs[i].VT;
3282 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3283 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv,
3284 /*IsVarArg=*/ !Outs[i].IsFixed);
3285 bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
3286 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3286, __extension__ __PRETTY_FUNCTION__))
;
3287 (void)Res;
3288 }
3289 } else {
3290 // At this point, Outs[].VT may already be promoted to i32. To correctly
3291 // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and
3292 // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT.
3293 // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here
3294 // we use a special version of AnalyzeCallOperands to pass in ValVT and
3295 // LocVT.
3296 unsigned NumArgs = Outs.size();
3297 for (unsigned i = 0; i != NumArgs; ++i) {
3298 MVT ValVT = Outs[i].VT;
3299 // Get type of the original argument.
3300 EVT ActualVT = getValueType(DAG.getDataLayout(),
3301 CLI.getArgs()[Outs[i].OrigArgIndex].Ty,
3302 /*AllowUnknown*/ true);
3303 MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT;
3304 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
3305 // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16.
3306 if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8)
3307 ValVT = MVT::i8;
3308 else if (ActualMVT == MVT::i16)
3309 ValVT = MVT::i16;
3310
3311 CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false);
3312 bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo);
3313 assert(!Res && "Call operand has unhandled type")(static_cast <bool> (!Res && "Call operand has unhandled type"
) ? void (0) : __assert_fail ("!Res && \"Call operand has unhandled type\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3313, __extension__ __PRETTY_FUNCTION__))
;
3314 (void)Res;
3315 }
3316 }
3317
3318 // Get a count of how many bytes are to be pushed on the stack.
3319 unsigned NumBytes = CCInfo.getNextStackOffset();
3320
3321 if (IsSibCall) {
3322 // Since we're not changing the ABI to make this a tail call, the memory
3323 // operands are already available in the caller's incoming argument space.
3324 NumBytes = 0;
3325 }
3326
3327 // FPDiff is the byte offset of the call's argument area from the callee's.
3328 // Stores to callee stack arguments will be placed in FixedStackSlots offset
3329 // by this amount for a tail call. In a sibling call it must be 0 because the
3330 // caller will deallocate the entire stack and the callee still expects its
3331 // arguments to begin at SP+0. Completely unused for non-tail calls.
3332 int FPDiff = 0;
3333
3334 if (IsTailCall && !IsSibCall) {
3335 unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
3336
3337 // Since callee will pop argument stack as a tail call, we must keep the
3338 // popped size 16-byte aligned.
3339 NumBytes = alignTo(NumBytes, 16);
3340
3341 // FPDiff will be negative if this tail call requires more space than we
3342 // would automatically have in our incoming argument space. Positive if we
3343 // can actually shrink the stack.
3344 FPDiff = NumReusableBytes - NumBytes;
3345
3346 // The stack pointer must be 16-byte aligned at all times it's used for a
3347 // memory operation, which in practice means at *all* times and in
3348 // particular across call boundaries. Therefore our own arguments started at
3349 // a 16-byte aligned SP and the delta applied for the tail call should
3350 // satisfy the same constraint.
3351 assert(FPDiff % 16 == 0 && "unaligned stack on tail call")(static_cast <bool> (FPDiff % 16 == 0 && "unaligned stack on tail call"
) ? void (0) : __assert_fail ("FPDiff % 16 == 0 && \"unaligned stack on tail call\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3351, __extension__ __PRETTY_FUNCTION__))
;
3352 }
3353
3354 // Adjust the stack pointer for the new arguments...
3355 // These operations are automatically eliminated by the prolog/epilog pass
3356 if (!IsSibCall)
3357 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
3358
3359 SDValue StackPtr = DAG.getCopyFromReg(Chain, DL, AArch64::SP,
3360 getPointerTy(DAG.getDataLayout()));
3361
3362 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
3363 SmallVector<SDValue, 8> MemOpChains;
3364 auto PtrVT = getPointerTy(DAG.getDataLayout());
3365
3366 // Walk the register/memloc assignments, inserting copies/loads.
3367 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); i != e;
3368 ++i, ++realArgIdx) {
3369 CCValAssign &VA = ArgLocs[i];
3370 SDValue Arg = OutVals[realArgIdx];
3371 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
3372
3373 // Promote the value if needed.
3374 switch (VA.getLocInfo()) {
3375 default:
3376 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3376)
;
3377 case CCValAssign::Full:
3378 break;
3379 case CCValAssign::SExt:
3380 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
3381 break;
3382 case CCValAssign::ZExt:
3383 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3384 break;
3385 case CCValAssign::AExt:
3386 if (Outs[realArgIdx].ArgVT == MVT::i1) {
3387 // AAPCS requires i1 to be zero-extended to 8-bits by the caller.
3388 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3389 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i8, Arg);
3390 }
3391 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
3392 break;
3393 case CCValAssign::BCvt:
3394 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3395 break;
3396 case CCValAssign::FPExt:
3397 Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
3398 break;
3399 }
3400
3401 if (VA.isRegLoc()) {
3402 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
3403 Outs[0].VT == MVT::i64) {
3404 assert(VA.getLocVT() == MVT::i64 &&(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3405, __extension__ __PRETTY_FUNCTION__))
3405 "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i64 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i64 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3405, __extension__ __PRETTY_FUNCTION__))
;
3406 assert(!Ins.empty() && Ins[0].VT == MVT::i64 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3407, __extension__ __PRETTY_FUNCTION__))
3407 "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i64 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i64 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3407, __extension__ __PRETTY_FUNCTION__))
;
3408 IsThisReturn = true;
3409 }
3410 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
3411 } else {
3412 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3412, __extension__ __PRETTY_FUNCTION__))
;
3413
3414 SDValue DstAddr;
3415 MachinePointerInfo DstInfo;
3416
3417 // FIXME: This works on big-endian for composite byvals, which are the
3418 // common case. It should also work for fundamental types too.
3419 uint32_t BEAlign = 0;
3420 unsigned OpSize = Flags.isByVal() ? Flags.getByValSize() * 8
3421 : VA.getValVT().getSizeInBits();
3422 OpSize = (OpSize + 7) / 8;
3423 if (!Subtarget->isLittleEndian() && !Flags.isByVal() &&
3424 !Flags.isInConsecutiveRegs()) {
3425 if (OpSize < 8)
3426 BEAlign = 8 - OpSize;
3427 }
3428 unsigned LocMemOffset = VA.getLocMemOffset();
3429 int32_t Offset = LocMemOffset + BEAlign;
3430 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3431 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3432
3433 if (IsTailCall) {
3434 Offset = Offset + FPDiff;
3435 int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
3436
3437 DstAddr = DAG.getFrameIndex(FI, PtrVT);
3438 DstInfo =
3439 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI);
3440
3441 // Make sure any stack arguments overlapping with where we're storing
3442 // are loaded before this eventual operation. Otherwise they'll be
3443 // clobbered.
3444 Chain = addTokenForArgument(Chain, DAG, MF.getFrameInfo(), FI);
3445 } else {
3446 SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
3447
3448 DstAddr = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
3449 DstInfo = MachinePointerInfo::getStack(DAG.getMachineFunction(),
3450 LocMemOffset);
3451 }
3452
3453 if (Outs[i].Flags.isByVal()) {
3454 SDValue SizeNode =
3455 DAG.getConstant(Outs[i].Flags.getByValSize(), DL, MVT::i64);
3456 SDValue Cpy = DAG.getMemcpy(
3457 Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(),
3458 /*isVol = */ false, /*AlwaysInline = */ false,
3459 /*isTailCall = */ false,
3460 DstInfo, MachinePointerInfo());
3461
3462 MemOpChains.push_back(Cpy);
3463 } else {
3464 // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already
3465 // promoted to a legal register type i32, we should truncate Arg back to
3466 // i1/i8/i16.
3467 if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 ||
3468 VA.getValVT() == MVT::i16)
3469 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
3470
3471 SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo);
3472 MemOpChains.push_back(Store);
3473 }
3474 }
3475 }
3476
3477 if (!MemOpChains.empty())
3478 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
3479
3480 // Build a sequence of copy-to-reg nodes chained together with token chain
3481 // and flag operands which copy the outgoing args into the appropriate regs.
3482 SDValue InFlag;
3483 for (auto &RegToPass : RegsToPass) {
3484 Chain = DAG.getCopyToReg(Chain, DL, RegToPass.first,
3485 RegToPass.second, InFlag);
3486 InFlag = Chain.getValue(1);
3487 }
3488
3489 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
3490 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
3491 // node so that legalize doesn't hack it.
3492 if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
3493 auto GV = G->getGlobal();
3494 if (Subtarget->classifyGlobalFunctionReference(GV, getTargetMachine()) ==
3495 AArch64II::MO_GOT) {
3496 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_GOT);
3497 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3498 } else if (Subtarget->isTargetCOFF() && GV->hasDLLImportStorageClass()) {
3499 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3500, __extension__ __PRETTY_FUNCTION__))
3500 "Windows is the only supported COFF target")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3500, __extension__ __PRETTY_FUNCTION__))
;
3501 Callee = getGOT(G, DAG, AArch64II::MO_DLLIMPORT);
3502 } else {
3503 const GlobalValue *GV = G->getGlobal();
3504 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
3505 }
3506 } else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
3507 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
3508 Subtarget->isTargetMachO()) {
3509 const char *Sym = S->getSymbol();
3510 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, AArch64II::MO_GOT);
3511 Callee = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, Callee);
3512 } else {
3513 const char *Sym = S->getSymbol();
3514 Callee = DAG.getTargetExternalSymbol(Sym, PtrVT, 0);
3515 }
3516 }
3517
3518 // We don't usually want to end the call-sequence here because we would tidy
3519 // the frame up *after* the call, however in the ABI-changing tail-call case
3520 // we've carefully laid out the parameters so that when sp is reset they'll be
3521 // in the correct location.
3522 if (IsTailCall && !IsSibCall) {
3523 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3524 DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
3525 InFlag = Chain.getValue(1);
3526 }
3527
3528 std::vector<SDValue> Ops;
3529 Ops.push_back(Chain);
3530 Ops.push_back(Callee);
3531
3532 if (IsTailCall) {
3533 // Each tail call may have to adjust the stack by a different amount, so
3534 // this information must travel along with the operation for eventual
3535 // consumption by emitEpilogue.
3536 Ops.push_back(DAG.getTargetConstant(FPDiff, DL, MVT::i32));
3537 }
3538
3539 // Add argument registers to the end of the list so that they are known live
3540 // into the call.
3541 for (auto &RegToPass : RegsToPass)
3542 Ops.push_back(DAG.getRegister(RegToPass.first,
3543 RegToPass.second.getValueType()));
3544
3545 // Add a register mask operand representing the call-preserved registers.
3546 const uint32_t *Mask;
3547 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3548 if (IsThisReturn) {
3549 // For 'this' returns, use the X0-preserving mask if applicable
3550 Mask = TRI->getThisReturnPreservedMask(MF, CallConv);
3551 if (!Mask) {
3552 IsThisReturn = false;
3553 Mask = TRI->getCallPreservedMask(MF, CallConv);
3554 }
3555 } else
3556 Mask = TRI->getCallPreservedMask(MF, CallConv);
3557
3558 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3558, __extension__ __PRETTY_FUNCTION__))
;
3559 Ops.push_back(DAG.getRegisterMask(Mask));
3560
3561 if (InFlag.getNode())
3562 Ops.push_back(InFlag);
3563
3564 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3565
3566 // If we're doing a tall call, use a TC_RETURN here rather than an
3567 // actual call instruction.
3568 if (IsTailCall) {
3569 MF.getFrameInfo().setHasTailCall();
3570 return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
3571 }
3572
3573 // Returns a chain and a flag for retval copy to use.
3574 Chain = DAG.getNode(AArch64ISD::CALL, DL, NodeTys, Ops);
3575 InFlag = Chain.getValue(1);
3576
3577 uint64_t CalleePopBytes =
3578 DoesCalleeRestoreStack(CallConv, TailCallOpt) ? alignTo(NumBytes, 16) : 0;
3579
3580 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
3581 DAG.getIntPtrConstant(CalleePopBytes, DL, true),
3582 InFlag, DL);
3583 if (!Ins.empty())
3584 InFlag = Chain.getValue(1);
3585
3586 // Handle result values, copying them out of physregs into vregs that we
3587 // return.
3588 return LowerCallResult(Chain, InFlag, CallConv, IsVarArg, Ins, DL, DAG,
3589 InVals, IsThisReturn,
3590 IsThisReturn ? OutVals[0] : SDValue());
3591}
3592
3593bool AArch64TargetLowering::CanLowerReturn(
3594 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
3595 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
3596 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3597 ? RetCC_AArch64_WebKit_JS
3598 : RetCC_AArch64_AAPCS;
3599 SmallVector<CCValAssign, 16> RVLocs;
3600 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
3601 return CCInfo.CheckReturn(Outs, RetCC);
3602}
3603
3604SDValue
3605AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
3606 bool isVarArg,
3607 const SmallVectorImpl<ISD::OutputArg> &Outs,
3608 const SmallVectorImpl<SDValue> &OutVals,
3609 const SDLoc &DL, SelectionDAG &DAG) const {
3610 CCAssignFn *RetCC = CallConv == CallingConv::WebKit_JS
3611 ? RetCC_AArch64_WebKit_JS
3612 : RetCC_AArch64_AAPCS;
3613 SmallVector<CCValAssign, 16> RVLocs;
3614 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3615 *DAG.getContext());
3616 CCInfo.AnalyzeReturn(Outs, RetCC);
3617
3618 // Copy the result values into the output registers.
3619 SDValue Flag;
3620 SmallVector<SDValue, 4> RetOps(1, Chain);
3621 for (unsigned i = 0, realRVLocIdx = 0; i != RVLocs.size();
3622 ++i, ++realRVLocIdx) {
3623 CCValAssign &VA = RVLocs[i];
3624 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3624, __extension__ __PRETTY_FUNCTION__))
;
3625 SDValue Arg = OutVals[realRVLocIdx];
3626
3627 switch (VA.getLocInfo()) {
3628 default:
3629 llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3629)
;
3630 case CCValAssign::Full:
3631 if (Outs[i].ArgVT == MVT::i1) {
3632 // AAPCS requires i1 to be zero-extended to i8 by the producer of the
3633 // value. This is strictly redundant on Darwin (which uses "zeroext
3634 // i1"), but will be optimised out before ISel.
3635 Arg = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Arg);
3636 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
3637 }
3638 break;
3639 case CCValAssign::BCvt:
3640 Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
3641 break;
3642 }
3643
3644 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
3645 Flag = Chain.getValue(1);
3646 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
3647 }
3648 const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
3649 const MCPhysReg *I =
3650 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
3651 if (I) {
3652 for (; *I; ++I) {
3653 if (AArch64::GPR64RegClass.contains(*I))
3654 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
3655 else if (AArch64::FPR64RegClass.contains(*I))
3656 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
3657 else
3658 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3658)
;
3659 }
3660 }
3661
3662 RetOps[0] = Chain; // Update chain.
3663
3664 // Add the flag if we have it.
3665 if (Flag.getNode())
3666 RetOps.push_back(Flag);
3667
3668 return DAG.getNode(AArch64ISD::RET_FLAG, DL, MVT::Other, RetOps);
3669}
3670
3671//===----------------------------------------------------------------------===//
3672// Other Lowering Code
3673//===----------------------------------------------------------------------===//
3674
3675SDValue AArch64TargetLowering::getTargetNode(GlobalAddressSDNode *N, EVT Ty,
3676 SelectionDAG &DAG,
3677 unsigned Flag) const {
3678 return DAG.getTargetGlobalAddress(N->getGlobal(), SDLoc(N), Ty, 0, Flag);
3679}
3680
3681SDValue AArch64TargetLowering::getTargetNode(JumpTableSDNode *N, EVT Ty,
3682 SelectionDAG &DAG,
3683 unsigned Flag) const {
3684 return DAG.getTargetJumpTable(N->getIndex(), Ty, Flag);
3685}
3686
3687SDValue AArch64TargetLowering::getTargetNode(ConstantPoolSDNode *N, EVT Ty,
3688 SelectionDAG &DAG,
3689 unsigned Flag) const {
3690 return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlignment(),
3691 N->getOffset(), Flag);
3692}
3693
3694SDValue AArch64TargetLowering::getTargetNode(BlockAddressSDNode* N, EVT Ty,
3695 SelectionDAG &DAG,
3696 unsigned Flag) const {
3697 return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, 0, Flag);
3698}
3699
3700// (loadGOT sym)
3701template <class NodeTy>
3702SDValue AArch64TargetLowering::getGOT(NodeTy *N, SelectionDAG &DAG,
3703 unsigned Flags) const {
3704 DEBUG(dbgs() << "AArch64TargetLowering::getGOT\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getGOT\n"
; } } while (false)
;
3705 SDLoc DL(N);
3706 EVT Ty = getPointerTy(DAG.getDataLayout());
3707 SDValue GotAddr = getTargetNode(N, Ty, DAG, AArch64II::MO_GOT | Flags);
3708 // FIXME: Once remat is capable of dealing with instructions with register
3709 // operands, expand this into two nodes instead of using a wrapper node.
3710 return DAG.getNode(AArch64ISD::LOADgot, DL, Ty, GotAddr);
3711}
3712
3713// (wrapper %highest(sym), %higher(sym), %hi(sym), %lo(sym))
3714template <class NodeTy>
3715SDValue AArch64TargetLowering::getAddrLarge(NodeTy *N, SelectionDAG &DAG,
3716 unsigned Flags) const {
3717 DEBUG(dbgs() << "AArch64TargetLowering::getAddrLarge\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddrLarge\n"
; } } while (false)
;
3718 SDLoc DL(N);
3719 EVT Ty = getPointerTy(DAG.getDataLayout());
3720 const unsigned char MO_NC = AArch64II::MO_NC;
3721 return DAG.getNode(
3722 AArch64ISD::WrapperLarge, DL, Ty,
3723 getTargetNode(N, Ty, DAG, AArch64II::MO_G3 | Flags),
3724 getTargetNode(N, Ty, DAG, AArch64II::MO_G2 | MO_NC | Flags),
3725 getTargetNode(N, Ty, DAG, AArch64II::MO_G1 | MO_NC | Flags),
3726 getTargetNode(N, Ty, DAG, AArch64II::MO_G0 | MO_NC | Flags));
3727}
3728
3729// (addlow (adrp %hi(sym)) %lo(sym))
3730template <class NodeTy>
3731SDValue AArch64TargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
3732 unsigned Flags) const {
3733 DEBUG(dbgs() << "AArch64TargetLowering::getAddr\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::getAddr\n"
; } } while (false)
;
3734 SDLoc DL(N);
3735 EVT Ty = getPointerTy(DAG.getDataLayout());
3736 SDValue Hi = getTargetNode(N, Ty, DAG, AArch64II::MO_PAGE | Flags);
3737 SDValue Lo = getTargetNode(N, Ty, DAG,
3738 AArch64II::MO_PAGEOFF | AArch64II::MO_NC | Flags);
3739 SDValue ADRP = DAG.getNode(AArch64ISD::ADRP, DL, Ty, Hi);
3740 return DAG.getNode(AArch64ISD::ADDlow, DL, Ty, ADRP, Lo);
3741}
3742
3743SDValue AArch64TargetLowering::LowerGlobalAddress(SDValue Op,
3744 SelectionDAG &DAG) const {
3745 GlobalAddressSDNode *GN = cast<GlobalAddressSDNode>(Op);
3746 const GlobalValue *GV = GN->getGlobal();
3747 const AArch64II::TOF TargetFlags =
3748 (GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
3749 : AArch64II::MO_NO_FLAG);
3750 unsigned char OpFlags =
3751 Subtarget->ClassifyGlobalReference(GV, getTargetMachine());
3752
3753 assert(cast<GlobalAddressSDNode>(Op)->getOffset() == 0 &&(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3754, __extension__ __PRETTY_FUNCTION__))
3754 "unexpected offset in global node")(static_cast <bool> (cast<GlobalAddressSDNode>(Op
)->getOffset() == 0 && "unexpected offset in global node"
) ? void (0) : __assert_fail ("cast<GlobalAddressSDNode>(Op)->getOffset() == 0 && \"unexpected offset in global node\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3754, __extension__ __PRETTY_FUNCTION__))
;
3755
3756 // This also catches the large code model case for Darwin.
3757 if ((OpFlags & AArch64II::MO_GOT) != 0) {
3758 return getGOT(GN, DAG, TargetFlags);
3759 }
3760
3761 SDValue Result;
3762 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
3763 Result = getAddrLarge(GN, DAG, TargetFlags);
3764 } else {
3765 Result = getAddr(GN, DAG, TargetFlags);
3766 }
3767 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3768 SDLoc DL(GN);
3769 if (GV->hasDLLImportStorageClass())
3770 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3771 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3772 return Result;
3773}
3774
3775/// \brief Convert a TLS address reference into the correct sequence of loads
3776/// and calls to compute the variable's address (for Darwin, currently) and
3777/// return an SDValue containing the final node.
3778
3779/// Darwin only has one TLS scheme which must be capable of dealing with the
3780/// fully general situation, in the worst case. This means:
3781/// + "extern __thread" declaration.
3782/// + Defined in a possibly unknown dynamic library.
3783///
3784/// The general system is that each __thread variable has a [3 x i64] descriptor
3785/// which contains information used by the runtime to calculate the address. The
3786/// only part of this the compiler needs to know about is the first xword, which
3787/// contains a function pointer that must be called with the address of the
3788/// entire descriptor in "x0".
3789///
3790/// Since this descriptor may be in a different unit, in general even the
3791/// descriptor must be accessed via an indirect load. The "ideal" code sequence
3792/// is:
3793/// adrp x0, _var@TLVPPAGE
3794/// ldr x0, [x0, _var@TLVPPAGEOFF] ; x0 now contains address of descriptor
3795/// ldr x1, [x0] ; x1 contains 1st entry of descriptor,
3796/// ; the function pointer
3797/// blr x1 ; Uses descriptor address in x0
3798/// ; Address of _var is now in x0.
3799///
3800/// If the address of _var's descriptor *is* known to the linker, then it can
3801/// change the first "ldr" instruction to an appropriate "add x0, x0, #imm" for
3802/// a slight efficiency gain.
3803SDValue
3804AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
3805 SelectionDAG &DAG) const {
3806 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3807, __extension__ __PRETTY_FUNCTION__))
3807 "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3807, __extension__ __PRETTY_FUNCTION__))
;
3808
3809 SDLoc DL(Op);
3810 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3811 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3812
3813 SDValue TLVPAddr =
3814 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3815 SDValue DescAddr = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TLVPAddr);
3816
3817 // The first entry in the descriptor is a function pointer that we must call
3818 // to obtain the address of the variable.
3819 SDValue Chain = DAG.getEntryNode();
3820 SDValue FuncTLVGet = DAG.getLoad(
3821 MVT::i64, DL, Chain, DescAddr,
3822 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
3823 /* Alignment = */ 8,
3824 MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant |
3825 MachineMemOperand::MODereferenceable);
3826 Chain = FuncTLVGet.getValue(1);
3827
3828 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
3829 MFI.setAdjustsStack(true);
3830
3831 // TLS calls preserve all registers except those that absolutely must be
3832 // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
3833 // silly).
3834 const uint32_t *Mask =
3835 Subtarget->getRegisterInfo()->getTLSCallPreservedMask();
3836
3837 // Finally, we can make the call. This is just a degenerate version of a
3838 // normal AArch64 call node: x0 takes the address of the descriptor, and
3839 // returns the address of the variable in this thread.
3840 Chain = DAG.getCopyToReg(Chain, DL, AArch64::X0, DescAddr, SDValue());
3841 Chain =
3842 DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
3843 Chain, FuncTLVGet, DAG.getRegister(AArch64::X0, MVT::i64),
3844 DAG.getRegisterMask(Mask), Chain.getValue(1));
3845 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Chain.getValue(1));
3846}
3847
3848/// When accessing thread-local variables under either the general-dynamic or
3849/// local-dynamic system, we make a "TLS-descriptor" call. The variable will
3850/// have a descriptor, accessible via a PC-relative ADRP, and whose first entry
3851/// is a function pointer to carry out the resolution.
3852///
3853/// The sequence is:
3854/// adrp x0, :tlsdesc:var
3855/// ldr x1, [x0, #:tlsdesc_lo12:var]
3856/// add x0, x0, #:tlsdesc_lo12:var
3857/// .tlsdesccall var
3858/// blr x1
3859/// (TPIDR_EL0 offset now in x0)
3860///
3861/// The above sequence must be produced unscheduled, to enable the linker to
3862/// optimize/relax this sequence.
3863/// Therefore, a pseudo-instruction (TLSDESC_CALLSEQ) is used to represent the
3864/// above sequence, and expanded really late in the compilation flow, to ensure
3865/// the sequence is produced as per above.
3866SDValue AArch64TargetLowering::LowerELFTLSDescCallSeq(SDValue SymAddr,
3867 const SDLoc &DL,
3868 SelectionDAG &DAG) const {
3869 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3870
3871 SDValue Chain = DAG.getEntryNode();
3872 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
3873
3874 Chain =
3875 DAG.getNode(AArch64ISD::TLSDESC_CALLSEQ, DL, NodeTys, {Chain, SymAddr});
3876 SDValue Glue = Chain.getValue(1);
3877
3878 return DAG.getCopyFromReg(Chain, DL, AArch64::X0, PtrVT, Glue);
3879}
3880
3881SDValue
3882AArch64TargetLowering::LowerELFGlobalTLSAddress(SDValue Op,
3883 SelectionDAG &DAG) const {
3884 assert(Subtarget->isTargetELF() && "This function expects an ELF target")(static_cast <bool> (Subtarget->isTargetELF() &&
"This function expects an ELF target") ? void (0) : __assert_fail
("Subtarget->isTargetELF() && \"This function expects an ELF target\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3884, __extension__ __PRETTY_FUNCTION__))
;
3885 assert(Subtarget->useSmallAddressing() &&(static_cast <bool> (Subtarget->useSmallAddressing()
&& "ELF TLS only supported in small memory model") ?
void (0) : __assert_fail ("Subtarget->useSmallAddressing() && \"ELF TLS only supported in small memory model\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3886, __extension__ __PRETTY_FUNCTION__))
3886 "ELF TLS only supported in small memory model")(static_cast <bool> (Subtarget->useSmallAddressing()
&& "ELF TLS only supported in small memory model") ?
void (0) : __assert_fail ("Subtarget->useSmallAddressing() && \"ELF TLS only supported in small memory model\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3886, __extension__ __PRETTY_FUNCTION__))
;
3887 // Different choices can be made for the maximum size of the TLS area for a
3888 // module. For the small address model, the default TLS size is 16MiB and the
3889 // maximum TLS size is 4GiB.
3890 // FIXME: add -mtls-size command line option and make it control the 16MiB
3891 // vs. 4GiB code sequence generation.
3892 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3893
3894 TLSModel::Model Model = getTargetMachine().getTLSModel(GA->getGlobal());
3895
3896 if (!EnableAArch64ELFLocalDynamicTLSGeneration) {
3897 if (Model == TLSModel::LocalDynamic)
3898 Model = TLSModel::GeneralDynamic;
3899 }
3900
3901 SDValue TPOff;
3902 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3903 SDLoc DL(Op);
3904 const GlobalValue *GV = GA->getGlobal();
3905
3906 SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
3907
3908 if (Model == TLSModel::LocalExec) {
3909 SDValue HiVar = DAG.getTargetGlobalAddress(
3910 GV, DL, PtrVT, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
3911 SDValue LoVar = DAG.getTargetGlobalAddress(
3912 GV, DL, PtrVT, 0,
3913 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3914
3915 SDValue TPWithOff_lo =
3916 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, ThreadBase,
3917 HiVar,
3918 DAG.getTargetConstant(0, DL, MVT::i32)),
3919 0);
3920 SDValue TPWithOff =
3921 SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPWithOff_lo,
3922 LoVar,
3923 DAG.getTargetConstant(0, DL, MVT::i32)),
3924 0);
3925 return TPWithOff;
3926 } else if (Model == TLSModel::InitialExec) {
3927 TPOff = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3928 TPOff = DAG.getNode(AArch64ISD::LOADgot, DL, PtrVT, TPOff);
3929 } else if (Model == TLSModel::LocalDynamic) {
3930 // Local-dynamic accesses proceed in two phases. A general-dynamic TLS
3931 // descriptor call against the special symbol _TLS_MODULE_BASE_ to calculate
3932 // the beginning of the module's TLS region, followed by a DTPREL offset
3933 // calculation.
3934
3935 // These accesses will need deduplicating if there's more than one.
3936 AArch64FunctionInfo *MFI =
3937 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
3938 MFI->incNumLocalDynamicTLSAccesses();
3939
3940 // The call needs a relocation too for linker relaxation. It doesn't make
3941 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
3942 // the address.
3943 SDValue SymAddr = DAG.getTargetExternalSymbol("_TLS_MODULE_BASE_", PtrVT,
3944 AArch64II::MO_TLS);
3945
3946 // Now we can calculate the offset from TPIDR_EL0 to this module's
3947 // thread-local area.
3948 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
3949
3950 // Now use :dtprel_whatever: operations to calculate this variable's offset
3951 // in its thread-storage area.
3952 SDValue HiVar = DAG.getTargetGlobalAddress(
3953 GV, DL, MVT::i64, 0, AArch64II::MO_TLS | AArch64II::MO_HI12);
3954 SDValue LoVar = DAG.getTargetGlobalAddress(
3955 GV, DL, MVT::i64, 0,
3956 AArch64II::MO_TLS | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
3957
3958 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, HiVar,
3959 DAG.getTargetConstant(0, DL, MVT::i32)),
3960 0);
3961 TPOff = SDValue(DAG.getMachineNode(AArch64::ADDXri, DL, PtrVT, TPOff, LoVar,
3962 DAG.getTargetConstant(0, DL, MVT::i32)),
3963 0);
3964 } else if (Model == TLSModel::GeneralDynamic) {
3965 // The call needs a relocation too for linker relaxation. It doesn't make
3966 // sense to call it MO_PAGE or MO_PAGEOFF though so we need another copy of
3967 // the address.
3968 SDValue SymAddr =
3969 DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, AArch64II::MO_TLS);
3970
3971 // Finally we can make a call to calculate the offset from tpidr_el0.
3972 TPOff = LowerELFTLSDescCallSeq(SymAddr, DL, DAG);
3973 } else
3974 llvm_unreachable("Unsupported ELF TLS access model")::llvm::llvm_unreachable_internal("Unsupported ELF TLS access model"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3974)
;
3975
3976 return DAG.getNode(ISD::ADD, DL, PtrVT, ThreadBase, TPOff);
3977}
3978
3979SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op,
3980 SelectionDAG &DAG) const {
3981 const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3982 if (DAG.getTarget().Options.EmulatedTLS)
3983 return LowerToTLSEmulatedModel(GA, DAG);
3984
3985 if (Subtarget->isTargetDarwin())
3986 return LowerDarwinGlobalTLSAddress(Op, DAG);
3987 if (Subtarget->isTargetELF())
3988 return LowerELFGlobalTLSAddress(Op, DAG);
3989
3990 llvm_unreachable("Unexpected platform trying to use TLS")::llvm::llvm_unreachable_internal("Unexpected platform trying to use TLS"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 3990)
;
3991}
3992
3993SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
3994 SDValue Chain = Op.getOperand(0);
3995 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
3996 SDValue LHS = Op.getOperand(2);
3997 SDValue RHS = Op.getOperand(3);
3998 SDValue Dest = Op.getOperand(4);
3999 SDLoc dl(Op);
4000
4001 // Handle f128 first, since lowering it will result in comparing the return
4002 // value of a libcall against zero, which is just what the rest of LowerBR_CC
4003 // is expecting to deal with.
4004 if (LHS.getValueType() == MVT::f128) {
4005 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4006
4007 // If softenSetCCOperands returned a scalar, we need to compare the result
4008 // against zero to select between true and false values.
4009 if (!RHS.getNode()) {
4010 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4011 CC = ISD::SETNE;
4012 }
4013 }
4014
4015 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a branch
4016 // instruction.
4017 if (isOverflowIntrOpRes(LHS) && isOneConstant(RHS) &&
4018 (CC == ISD::SETEQ || CC == ISD::SETNE)) {
4019 // Only lower legal XALUO ops.
4020 if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS->getValueType(0)))
4021 return SDValue();
4022
4023 // The actual operation with overflow check.
4024 AArch64CC::CondCode OFCC;
4025 SDValue Value, Overflow;
4026 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, LHS.getValue(0), DAG);
4027
4028 if (CC == ISD::SETNE)
4029 OFCC = getInvertedCondCode(OFCC);
4030 SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
4031
4032 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4033 Overflow);
4034 }
4035
4036 if (LHS.getValueType().isInteger()) {
4037 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4038, __extension__ __PRETTY_FUNCTION__))
4038 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4038, __extension__ __PRETTY_FUNCTION__))
;
4039
4040 // If the RHS of the comparison is zero, we can potentially fold this
4041 // to a specialized branch.
4042 const ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS);
4043 if (RHSC && RHSC->getZExtValue() == 0) {
4044 if (CC == ISD::SETEQ) {
4045 // See if we can use a TBZ to fold in an AND as well.
4046 // TBZ has a smaller branch displacement than CBZ. If the offset is
4047 // out of bounds, a late MI-layer pass rewrites branches.
4048 // 403.gcc is an example that hits this case.
4049 if (LHS.getOpcode() == ISD::AND &&
4050 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4051 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4052 SDValue Test = LHS.getOperand(0);
4053 uint64_t Mask = LHS.getConstantOperandVal(1);
4054 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
4055 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4056 Dest);
4057 }
4058
4059 return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
4060 } else if (CC == ISD::SETNE) {
4061 // See if we can use a TBZ to fold in an AND as well.
4062 // TBZ has a smaller branch displacement than CBZ. If the offset is
4063 // out of bounds, a late MI-layer pass rewrites branches.
4064 // 403.gcc is an example that hits this case.
4065 if (LHS.getOpcode() == ISD::AND &&
4066 isa<ConstantSDNode>(LHS.getOperand(1)) &&
4067 isPowerOf2_64(LHS.getConstantOperandVal(1))) {
4068 SDValue Test = LHS.getOperand(0);
4069 uint64_t Mask = LHS.getConstantOperandVal(1);
4070 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
4071 DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
4072 Dest);
4073 }
4074
4075 return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
4076 } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
4077 // Don't combine AND since emitComparison converts the AND to an ANDS
4078 // (a.k.a. TST) and the test in the test bit and branch instruction
4079 // becomes redundant. This would also increase register pressure.
4080 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4081 return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
4082 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4083 }
4084 }
4085 if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
4086 LHS.getOpcode() != ISD::AND) {
4087 // Don't combine AND since emitComparison converts the AND to an ANDS
4088 // (a.k.a. TST) and the test in the test bit and branch instruction
4089 // becomes redundant. This would also increase register pressure.
4090 uint64_t Mask = LHS.getValueSizeInBits() - 1;
4091 return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
4092 DAG.getConstant(Mask, dl, MVT::i64), Dest);
4093 }
4094
4095 SDValue CCVal;
4096 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4097 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
4098 Cmp);
4099 }
4100
4101 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4102, __extension__ __PRETTY_FUNCTION__))
4102 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4102, __extension__ __PRETTY_FUNCTION__))
;
4103
4104 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4105 // clean. Some of them require two branches to implement.
4106 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4107 AArch64CC::CondCode CC1, CC2;
4108 changeFPCCToAArch64CC(CC, CC1, CC2);
4109 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4110 SDValue BR1 =
4111 DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
4112 if (CC2 != AArch64CC::AL) {
4113 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4114 return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
4115 Cmp);
4116 }
4117
4118 return BR1;
4119}
4120
4121SDValue AArch64TargetLowering::LowerFCOPYSIGN(SDValue Op,
4122 SelectionDAG &DAG) const {
4123 EVT VT = Op.getValueType();
4124 SDLoc DL(Op);
4125
4126 SDValue In1 = Op.getOperand(0);
4127 SDValue In2 = Op.getOperand(1);
4128 EVT SrcVT = In2.getValueType();
4129
4130 if (SrcVT.bitsLT(VT))
4131 In2 = DAG.getNode(ISD::FP_EXTEND, DL, VT, In2);
4132 else if (SrcVT.bitsGT(VT))
4133 In2 = DAG.getNode(ISD::FP_ROUND, DL, VT, In2, DAG.getIntPtrConstant(0, DL));
4134
4135 EVT VecVT;
4136 uint64_t EltMask;
4137 SDValue VecVal1, VecVal2;
4138
4139 auto setVecVal = [&] (int Idx) {
4140 if (!VT.isVector()) {
4141 VecVal1 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4142 DAG.getUNDEF(VecVT), In1);
4143 VecVal2 = DAG.getTargetInsertSubreg(Idx, DL, VecVT,
4144 DAG.getUNDEF(VecVT), In2);
4145 } else {
4146 VecVal1 = DAG.getNode(ISD::BITCAST, DL, VecVT, In1);
4147 VecVal2 = DAG.getNode(ISD::BITCAST, DL, VecVT, In2);
4148 }
4149 };
4150
4151 if (VT == MVT::f32 || VT == MVT::v2f32 || VT == MVT::v4f32) {
4152 VecVT = (VT == MVT::v2f32 ? MVT::v2i32 : MVT::v4i32);
4153 EltMask = 0x80000000ULL;
4154 setVecVal(AArch64::ssub);
4155 } else if (VT == MVT::f64 || VT == MVT::v2f64) {
4156 VecVT = MVT::v2i64;
4157
4158 // We want to materialize a mask with the high bit set, but the AdvSIMD
4159 // immediate moves cannot materialize that in a single instruction for
4160 // 64-bit elements. Instead, materialize zero and then negate it.
4161 EltMask = 0;
4162
4163 setVecVal(AArch64::dsub);
4164 } else if (VT == MVT::f16 || VT == MVT::v4f16 || VT == MVT::v8f16) {
4165 VecVT = (VT == MVT::v4f16 ? MVT::v4i16 : MVT::v8i16);
4166 EltMask = 0x8000ULL;
4167 setVecVal(AArch64::hsub);
4168 } else {
4169 llvm_unreachable("Invalid type for copysign!")::llvm::llvm_unreachable_internal("Invalid type for copysign!"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4169)
;
4170 }
4171
4172 SDValue BuildVec = DAG.getConstant(EltMask, DL, VecVT);
4173
4174 // If we couldn't materialize the mask above, then the mask vector will be
4175 // the zero vector, and we need to negate it here.
4176 if (VT == MVT::f64 || VT == MVT::v2f64) {
4177 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, BuildVec);
4178 BuildVec = DAG.getNode(ISD::FNEG, DL, MVT::v2f64, BuildVec);
4179 BuildVec = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, BuildVec);
4180 }
4181
4182 SDValue Sel =
4183 DAG.getNode(AArch64ISD::BIT, DL, VecVT, VecVal1, VecVal2, BuildVec);
4184
4185 if (VT == MVT::f16)
4186 return DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Sel);
4187 if (VT == MVT::f32)
4188 return DAG.getTargetExtractSubreg(AArch64::ssub, DL, VT, Sel);
4189 else if (VT == MVT::f64)
4190 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, VT, Sel);
4191 else
4192 return DAG.getNode(ISD::BITCAST, DL, VT, Sel);
4193}
4194
4195SDValue AArch64TargetLowering::LowerCTPOP(SDValue Op, SelectionDAG &DAG) const {
4196 if (DAG.getMachineFunction().getFunction().hasFnAttribute(
4197 Attribute::NoImplicitFloat))
4198 return SDValue();
4199
4200 if (!Subtarget->hasNEON())
4201 return SDValue();
4202
4203 // While there is no integer popcount instruction, it can
4204 // be more efficiently lowered to the following sequence that uses
4205 // AdvSIMD registers/instructions as long as the copies to/from
4206 // the AdvSIMD registers are cheap.
4207 // FMOV D0, X0 // copy 64-bit int to vector, high bits zero'd
4208 // CNT V0.8B, V0.8B // 8xbyte pop-counts
4209 // ADDV B0, V0.8B // sum 8xbyte pop-counts
4210 // UMOV X0, V0.B[0] // copy byte result back to integer reg
4211 SDValue Val = Op.getOperand(0);
4212 SDLoc DL(Op);
4213 EVT VT = Op.getValueType();
4214
4215 if (VT == MVT::i32)
4216 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
4217 Val = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Val);
4218
4219 SDValue CtPop = DAG.getNode(ISD::CTPOP, DL, MVT::v8i8, Val);
4220 SDValue UaddLV = DAG.getNode(
4221 ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
4222 DAG.getConstant(Intrinsic::aarch64_neon_uaddlv, DL, MVT::i32), CtPop);
4223
4224 if (VT == MVT::i64)
4225 UaddLV = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, UaddLV);
4226 return UaddLV;
4227}
4228
4229SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
4230
4231 if (Op.getValueType().isVector())
4232 return LowerVSETCC(Op, DAG);
4233
4234 SDValue LHS = Op.getOperand(0);
4235 SDValue RHS = Op.getOperand(1);
4236 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
4237 SDLoc dl(Op);
4238
4239 // We chose ZeroOrOneBooleanContents, so use zero and one.
4240 EVT VT = Op.getValueType();
4241 SDValue TVal = DAG.getConstant(1, dl, VT);
4242 SDValue FVal = DAG.getConstant(0, dl, VT);
4243
4244 // Handle f128 first, since one possible outcome is a normal integer
4245 // comparison which gets picked up by the next if statement.
4246 if (LHS.getValueType() == MVT::f128) {
4247 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4248
4249 // If softenSetCCOperands returned a scalar, use it.
4250 if (!RHS.getNode()) {
4251 assert(LHS.getValueType() == Op.getValueType() &&(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4252, __extension__ __PRETTY_FUNCTION__))
4252 "Unexpected setcc expansion!")(static_cast <bool> (LHS.getValueType() == Op.getValueType
() && "Unexpected setcc expansion!") ? void (0) : __assert_fail
("LHS.getValueType() == Op.getValueType() && \"Unexpected setcc expansion!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4252, __extension__ __PRETTY_FUNCTION__))
;
4253 return LHS;
4254 }
4255 }
4256
4257 if (LHS.getValueType().isInteger()) {
4258 SDValue CCVal;
4259 SDValue Cmp =
4260 getAArch64Cmp(LHS, RHS, ISD::getSetCCInverse(CC, true), CCVal, DAG, dl);
4261
4262 // Note that we inverted the condition above, so we reverse the order of
4263 // the true and false operands here. This will allow the setcc to be
4264 // matched to a single CSINC instruction.
4265 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
4266 }
4267
4268 // Now we know we're dealing with FP values.
4269 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4270, __extension__ __PRETTY_FUNCTION__))
4270 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4270, __extension__ __PRETTY_FUNCTION__))
;
4271
4272 // If that fails, we'll need to perform an FCMP + CSEL sequence. Go ahead
4273 // and do the comparison.
4274 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4275
4276 AArch64CC::CondCode CC1, CC2;
4277 changeFPCCToAArch64CC(CC, CC1, CC2);
4278 if (CC2 == AArch64CC::AL) {
4279 changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, false), CC1, CC2);
4280 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4281
4282 // Note that we inverted the condition above, so we reverse the order of
4283 // the true and false operands here. This will allow the setcc to be
4284 // matched to a single CSINC instruction.
4285 return DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
4286 } else {
4287 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
4288 // totally clean. Some of them require two CSELs to implement. As is in
4289 // this case, we emit the first CSEL and then emit a second using the output
4290 // of the first as the RHS. We're effectively OR'ing the two CC's together.
4291
4292 // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
4293 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4294 SDValue CS1 =
4295 DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4296
4297 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4298 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4299 }
4300}
4301
4302SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
4303 SDValue RHS, SDValue TVal,
4304 SDValue FVal, const SDLoc &dl,
4305 SelectionDAG &DAG) const {
4306 // Handle f128 first, because it will result in a comparison of some RTLIB
4307 // call result against zero.
4308 if (LHS.getValueType() == MVT::f128) {
4309 softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl);
4310
4311 // If softenSetCCOperands returned a scalar, we need to compare the result
4312 // against zero to select between true and false values.
4313 if (!RHS.getNode()) {
4314 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4315 CC = ISD::SETNE;
4316 }
4317 }
4318
4319 // Also handle f16, for which we need to do a f32 comparison.
4320 if (LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) {
4321 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
4322 RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
4323 }
4324
4325 // Next, handle integers.
4326 if (LHS.getValueType().isInteger()) {
4327 assert((LHS.getValueType() == RHS.getValueType()) &&(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4328, __extension__ __PRETTY_FUNCTION__))
4328 (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64))(static_cast <bool> ((LHS.getValueType() == RHS.getValueType
()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType
() == MVT::i64)) ? void (0) : __assert_fail ("(LHS.getValueType() == RHS.getValueType()) && (LHS.getValueType() == MVT::i32 || LHS.getValueType() == MVT::i64)"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4328, __extension__ __PRETTY_FUNCTION__))
;
4329
4330 unsigned Opcode = AArch64ISD::CSEL;
4331
4332 // If both the TVal and the FVal are constants, see if we can swap them in
4333 // order to for a CSINV or CSINC out of them.
4334 ConstantSDNode *CFVal = dyn_cast<ConstantSDNode>(FVal);
4335 ConstantSDNode *CTVal = dyn_cast<ConstantSDNode>(TVal);
4336
4337 if (CTVal && CFVal && CTVal->isAllOnesValue() && CFVal->isNullValue()) {
4338 std::swap(TVal, FVal);
4339 std::swap(CTVal, CFVal);
4340 CC = ISD::getSetCCInverse(CC, true);
4341 } else if (CTVal && CFVal && CTVal->isOne() && CFVal->isNullValue()) {
4342 std::swap(TVal, FVal);
4343 std::swap(CTVal, CFVal);
4344 CC = ISD::getSetCCInverse(CC, true);
4345 } else if (TVal.getOpcode() == ISD::XOR) {
4346 // If TVal is a NOT we want to swap TVal and FVal so that we can match
4347 // with a CSINV rather than a CSEL.
4348 if (isAllOnesConstant(TVal.getOperand(1))) {
4349 std::swap(TVal, FVal);
4350 std::swap(CTVal, CFVal);
4351 CC = ISD::getSetCCInverse(CC, true);
4352 }
4353 } else if (TVal.getOpcode() == ISD::SUB) {
4354 // If TVal is a negation (SUB from 0) we want to swap TVal and FVal so
4355 // that we can match with a CSNEG rather than a CSEL.
4356 if (isNullConstant(TVal.getOperand(0))) {
4357 std::swap(TVal, FVal);
4358 std::swap(CTVal, CFVal);
4359 CC = ISD::getSetCCInverse(CC, true);
4360 }
4361 } else if (CTVal && CFVal) {
4362 const int64_t TrueVal = CTVal->getSExtValue();
4363 const int64_t FalseVal = CFVal->getSExtValue();
4364 bool Swap = false;
4365
4366 // If both TVal and FVal are constants, see if FVal is the
4367 // inverse/negation/increment of TVal and generate a CSINV/CSNEG/CSINC
4368 // instead of a CSEL in that case.
4369 if (TrueVal == ~FalseVal) {
4370 Opcode = AArch64ISD::CSINV;
4371 } else if (TrueVal == -FalseVal) {
4372 Opcode = AArch64ISD::CSNEG;
4373 } else if (TVal.getValueType() == MVT::i32) {
4374 // If our operands are only 32-bit wide, make sure we use 32-bit
4375 // arithmetic for the check whether we can use CSINC. This ensures that
4376 // the addition in the check will wrap around properly in case there is
4377 // an overflow (which would not be the case if we do the check with
4378 // 64-bit arithmetic).
4379 const uint32_t TrueVal32 = CTVal->getZExtValue();
4380 const uint32_t FalseVal32 = CFVal->getZExtValue();
4381
4382 if ((TrueVal32 == FalseVal32 + 1) || (TrueVal32 + 1 == FalseVal32)) {
4383 Opcode = AArch64ISD::CSINC;
4384
4385 if (TrueVal32 > FalseVal32) {
4386 Swap = true;
4387 }
4388 }
4389 // 64-bit check whether we can use CSINC.
4390 } else if ((TrueVal == FalseVal + 1) || (TrueVal + 1 == FalseVal)) {
4391 Opcode = AArch64ISD::CSINC;
4392
4393 if (TrueVal > FalseVal) {
4394 Swap = true;
4395 }
4396 }
4397
4398 // Swap TVal and FVal if necessary.
4399 if (Swap) {
4400 std::swap(TVal, FVal);
4401 std::swap(CTVal, CFVal);
4402 CC = ISD::getSetCCInverse(CC, true);
4403 }
4404
4405 if (Opcode != AArch64ISD::CSEL) {
4406 // Drop FVal since we can get its value by simply inverting/negating
4407 // TVal.
4408 FVal = TVal;
4409 }
4410 }
4411
4412 // Avoid materializing a constant when possible by reusing a known value in
4413 // a register. However, don't perform this optimization if the known value
4414 // is one, zero or negative one in the case of a CSEL. We can always
4415 // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
4416 // FVal, respectively.
4417 ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
4418 if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
4419 !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
4420 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4421 // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
4422 // "a != C ? x : a" to avoid materializing C.
4423 if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
4424 TVal = LHS;
4425 else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
4426 FVal = LHS;
4427 } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
4428 assert (CTVal && CFVal && "Expected constant operands for CSNEG.")(static_cast <bool> (CTVal && CFVal && "Expected constant operands for CSNEG."
) ? void (0) : __assert_fail ("CTVal && CFVal && \"Expected constant operands for CSNEG.\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4428, __extension__ __PRETTY_FUNCTION__))
;
4429 // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
4430 // avoid materializing C.
4431 AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
4432 if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
4433 Opcode = AArch64ISD::CSINV;
4434 TVal = LHS;
4435 FVal = DAG.getConstant(0, dl, FVal.getValueType());
4436 }
4437 }
4438
4439 SDValue CCVal;
4440 SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
4441 EVT VT = TVal.getValueType();
4442 return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
4443 }
4444
4445 // Now we know we're dealing with FP values.
4446 assert(LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 ||(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4447, __extension__ __PRETTY_FUNCTION__))
4447 LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f16 || LHS
.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64
) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f16 || LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4447, __extension__ __PRETTY_FUNCTION__))
;
4448 assert(LHS.getValueType() == RHS.getValueType())(static_cast <bool> (LHS.getValueType() == RHS.getValueType
()) ? void (0) : __assert_fail ("LHS.getValueType() == RHS.getValueType()"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4448, __extension__ __PRETTY_FUNCTION__))
;
4449 EVT VT = TVal.getValueType();
4450 SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
4451
4452 // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
4453 // clean. Some of them require two CSELs to implement.
4454 AArch64CC::CondCode CC1, CC2;
4455 changeFPCCToAArch64CC(CC, CC1, CC2);
4456
4457 if (DAG.getTarget().Options.UnsafeFPMath) {
4458 // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
4459 // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
4460 ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
4461 if (RHSVal && RHSVal->isZero()) {
4462 ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
4463 ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
4464
4465 if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
4466 CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
4467 TVal = LHS;
4468 else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
4469 CFVal && CFVal->isZero() &&
4470 FVal.getValueType() == LHS.getValueType())
4471 FVal = LHS;
4472 }
4473 }
4474
4475 // Emit first, and possibly only, CSEL.
4476 SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
4477 SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
4478
4479 // If we need a second CSEL, emit it, using the output of the first as the
4480 // RHS. We're effectively OR'ing the two CC's together.
4481 if (CC2 != AArch64CC::AL) {
4482 SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
4483 return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
4484 }
4485
4486 // Otherwise, return the output of the first CSEL.
4487 return CS1;
4488}
4489
4490SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
4491 SelectionDAG &DAG) const {
4492 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4493 SDValue LHS = Op.getOperand(0);
4494 SDValue RHS = Op.getOperand(1);
4495 SDValue TVal = Op.getOperand(2);
4496 SDValue FVal = Op.getOperand(3);
4497 SDLoc DL(Op);
4498 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4499}
4500
4501SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
4502 SelectionDAG &DAG) const {
4503 SDValue CCVal = Op->getOperand(0);
4504 SDValue TVal = Op->getOperand(1);
4505 SDValue FVal = Op->getOperand(2);
4506 SDLoc DL(Op);
4507
4508 // Optimize {s|u}{add|sub|mul}.with.overflow feeding into a select
4509 // instruction.
4510 if (isOverflowIntrOpRes(CCVal)) {
4511 // Only lower legal XALUO ops.
4512 if (!DAG.getTargetLoweringInfo().isTypeLegal(CCVal->getValueType(0)))
4513 return SDValue();
4514
4515 AArch64CC::CondCode OFCC;
4516 SDValue Value, Overflow;
4517 std::tie(Value, Overflow) = getAArch64XALUOOp(OFCC, CCVal.getValue(0), DAG);
4518 SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
4519
4520 return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
4521 CCVal, Overflow);
4522 }
4523
4524 // Lower it the same way as we would lower a SELECT_CC node.
4525 ISD::CondCode CC;
4526 SDValue LHS, RHS;
4527 if (CCVal.getOpcode() == ISD::SETCC) {
4528 LHS = CCVal.getOperand(0);
4529 RHS = CCVal.getOperand(1);
4530 CC = cast<CondCodeSDNode>(CCVal->getOperand(2))->get();
4531 } else {
4532 LHS = CCVal;
4533 RHS = DAG.getConstant(0, DL, CCVal.getValueType());
4534 CC = ISD::SETNE;
4535 }
4536 return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
4537}
4538
4539SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op,
4540 SelectionDAG &DAG) const {
4541 // Jump table entries as PC relative offsets. No additional tweaking
4542 // is necessary here. Just get the address of the jump table.
4543 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
4544
4545 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4546 !Subtarget->isTargetMachO()) {
4547 return getAddrLarge(JT, DAG);
4548 }
4549 return getAddr(JT, DAG);
4550}
4551
4552SDValue AArch64TargetLowering::LowerConstantPool(SDValue Op,
4553 SelectionDAG &DAG) const {
4554 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
4555
4556 if (getTargetMachine().getCodeModel() == CodeModel::Large) {
4557 // Use the GOT for the large code model on iOS.
4558 if (Subtarget->isTargetMachO()) {
4559 return getGOT(CP, DAG);
4560 }
4561 return getAddrLarge(CP, DAG);
4562 } else {
4563 return getAddr(CP, DAG);
4564 }
4565}
4566
4567SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op,
4568 SelectionDAG &DAG) const {
4569 BlockAddressSDNode *BA = cast<BlockAddressSDNode>(Op);
4570 if (getTargetMachine().getCodeModel() == CodeModel::Large &&
4571 !Subtarget->isTargetMachO()) {
4572 return getAddrLarge(BA, DAG);
4573 } else {
4574 return getAddr(BA, DAG);
4575 }
4576}
4577
4578SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
4579 SelectionDAG &DAG) const {
4580 AArch64FunctionInfo *FuncInfo =
4581 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4582
4583 SDLoc DL(Op);
4584 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(),
4585 getPointerTy(DAG.getDataLayout()));
4586 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4587 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4588 MachinePointerInfo(SV));
4589}
4590
4591SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
4592 SelectionDAG &DAG) const {
4593 AArch64FunctionInfo *FuncInfo =
4594 DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
4595
4596 SDLoc DL(Op);
4597 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
4598 ? FuncInfo->getVarArgsGPRIndex()
4599 : FuncInfo->getVarArgsStackIndex(),
4600 getPointerTy(DAG.getDataLayout()));
4601 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4602 return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
4603 MachinePointerInfo(SV));
4604}
4605
4606SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
4607 SelectionDAG &DAG) const {
4608 // The layout of the va_list struct is specified in the AArch64 Procedure Call
4609 // Standard, section B.3.
4610 MachineFunction &MF = DAG.getMachineFunction();
4611 AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
4612 auto PtrVT = getPointerTy(DAG.getDataLayout());
4613 SDLoc DL(Op);
4614
4615 SDValue Chain = Op.getOperand(0);
4616 SDValue VAList = Op.getOperand(1);
4617 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4618 SmallVector<SDValue, 4> MemOps;
4619
4620 // void *__stack at offset 0
4621 SDValue Stack = DAG.getFrameIndex(FuncInfo->getVarArgsStackIndex(), PtrVT);
4622 MemOps.push_back(DAG.getStore(Chain, DL, Stack, VAList,
4623 MachinePointerInfo(SV), /* Alignment = */ 8));
4624
4625 // void *__gr_top at offset 8
4626 int GPRSize = FuncInfo->getVarArgsGPRSize();
4627 if (GPRSize > 0) {
4628 SDValue GRTop, GRTopAddr;
4629
4630 GRTopAddr =
4631 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(8, DL, PtrVT));
4632
4633 GRTop = DAG.getFrameIndex(FuncInfo->getVarArgsGPRIndex(), PtrVT);
4634 GRTop = DAG.getNode(ISD::ADD, DL, PtrVT, GRTop,
4635 DAG.getConstant(GPRSize, DL, PtrVT));
4636
4637 MemOps.push_back(DAG.getStore(Chain, DL, GRTop, GRTopAddr,
4638 MachinePointerInfo(SV, 8),
4639 /* Alignment = */ 8));
4640 }
4641
4642 // void *__vr_top at offset 16
4643 int FPRSize = FuncInfo->getVarArgsFPRSize();
4644 if (FPRSize > 0) {
4645 SDValue VRTop, VRTopAddr;
4646 VRTopAddr = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4647 DAG.getConstant(16, DL, PtrVT));
4648
4649 VRTop = DAG.getFrameIndex(FuncInfo->getVarArgsFPRIndex(), PtrVT);
4650 VRTop = DAG.getNode(ISD::ADD, DL, PtrVT, VRTop,
4651 DAG.getConstant(FPRSize, DL, PtrVT));
4652
4653 MemOps.push_back(DAG.getStore(Chain, DL, VRTop, VRTopAddr,
4654 MachinePointerInfo(SV, 16),
4655 /* Alignment = */ 8));
4656 }
4657
4658 // int __gr_offs at offset 24
4659 SDValue GROffsAddr =
4660 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(24, DL, PtrVT));
4661 MemOps.push_back(DAG.getStore(
4662 Chain, DL, DAG.getConstant(-GPRSize, DL, MVT::i32), GROffsAddr,
4663 MachinePointerInfo(SV, 24), /* Alignment = */ 4));
4664
4665 // int __vr_offs at offset 28
4666 SDValue VROffsAddr =
4667 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getConstant(28, DL, PtrVT));
4668 MemOps.push_back(DAG.getStore(
4669 Chain, DL, DAG.getConstant(-FPRSize, DL, MVT::i32), VROffsAddr,
4670 MachinePointerInfo(SV, 28), /* Alignment = */ 4));
4671
4672 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
4673}
4674
4675SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
4676 SelectionDAG &DAG) const {
4677 MachineFunction &MF = DAG.getMachineFunction();
4678
4679 if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
4680 return LowerWin64_VASTART(Op, DAG);
4681 else if (Subtarget->isTargetDarwin())
4682 return LowerDarwin_VASTART(Op, DAG);
4683 else
4684 return LowerAAPCS_VASTART(Op, DAG);
4685}
4686
4687SDValue AArch64TargetLowering::LowerVACOPY(SDValue Op,
4688 SelectionDAG &DAG) const {
4689 // AAPCS has three pointers and two ints (= 32 bytes), Darwin has single
4690 // pointer.
4691 SDLoc DL(Op);
4692 unsigned VaListSize =
4693 Subtarget->isTargetDarwin() || Subtarget->isTargetWindows() ? 8 : 32;
4694 const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
4695 const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
4696
4697 return DAG.getMemcpy(Op.getOperand(0), DL, Op.getOperand(1),
4698 Op.getOperand(2),
4699 DAG.getConstant(VaListSize, DL, MVT::i32),
4700 8, false, false, false, MachinePointerInfo(DestSV),
4701 MachinePointerInfo(SrcSV));
4702}
4703
4704SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
4705 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4706, __extension__ __PRETTY_FUNCTION__))
4706 "automatic va_arg instruction only works on Darwin")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"automatic va_arg instruction only works on Darwin") ? void (
0) : __assert_fail ("Subtarget->isTargetDarwin() && \"automatic va_arg instruction only works on Darwin\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4706, __extension__ __PRETTY_FUNCTION__))
;
4707
4708 const Value *V = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
4709 EVT VT = Op.getValueType();
4710 SDLoc DL(Op);
4711 SDValue Chain = Op.getOperand(0);
4712 SDValue Addr = Op.getOperand(1);
4713 unsigned Align = Op.getConstantOperandVal(3);
4714 auto PtrVT = getPointerTy(DAG.getDataLayout());
4715
4716 SDValue VAList = DAG.getLoad(PtrVT, DL, Chain, Addr, MachinePointerInfo(V));
4717 Chain = VAList.getValue(1);
4718
4719 if (Align > 8) {
4720 assert(((Align & (Align - 1)) == 0) && "Expected Align to be a power of 2")(static_cast <bool> (((Align & (Align - 1)) == 0) &&
"Expected Align to be a power of 2") ? void (0) : __assert_fail
("((Align & (Align - 1)) == 0) && \"Expected Align to be a power of 2\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4720, __extension__ __PRETTY_FUNCTION__))
;
4721 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4722 DAG.getConstant(Align - 1, DL, PtrVT));
4723 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
4724 DAG.getConstant(-(int64_t)Align, DL, PtrVT));
4725 }
4726
4727 Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
4728 uint64_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy);
4729
4730 // Scalar integer and FP values smaller than 64 bits are implicitly extended
4731 // up to 64 bits. At the very least, we have to increase the striding of the
4732 // vaargs list to match this, and for FP values we need to introduce
4733 // FP_ROUND nodes as well.
4734 if (VT.isInteger() && !VT.isVector())
4735 ArgSize = 8;
4736 bool NeedFPTrunc = false;
4737 if (VT.isFloatingPoint() && !VT.isVector() && VT != MVT::f64) {
4738 ArgSize = 8;
4739 NeedFPTrunc = true;
4740 }
4741
4742 // Increment the pointer, VAList, to the next vaarg
4743 SDValue VANext = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
4744 DAG.getConstant(ArgSize, DL, PtrVT));
4745 // Store the incremented VAList to the legalized pointer
4746 SDValue APStore =
4747 DAG.getStore(Chain, DL, VANext, Addr, MachinePointerInfo(V));
4748
4749 // Load the actual argument out of the pointer VAList
4750 if (NeedFPTrunc) {
4751 // Load the value as an f64.
4752 SDValue WideFP =
4753 DAG.getLoad(MVT::f64, DL, APStore, VAList, MachinePointerInfo());
4754 // Round the value down to an f32.
4755 SDValue NarrowFP = DAG.getNode(ISD::FP_ROUND, DL, VT, WideFP.getValue(0),
4756 DAG.getIntPtrConstant(1, DL));
4757 SDValue Ops[] = { NarrowFP, WideFP.getValue(1) };
4758 // Merge the rounded value with the chain output of the load.
4759 return DAG.getMergeValues(Ops, DL);
4760 }
4761
4762 return DAG.getLoad(VT, DL, APStore, VAList, MachinePointerInfo());
4763}
4764
4765SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
4766 SelectionDAG &DAG) const {
4767 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4768 MFI.setFrameAddressIsTaken(true);
4769
4770 EVT VT = Op.getValueType();
4771 SDLoc DL(Op);
4772 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4773 SDValue FrameAddr =
4774 DAG.getCopyFromReg(DAG.getEntryNode(), DL, AArch64::FP, VT);
4775 while (Depth--)
4776 FrameAddr = DAG.getLoad(VT, DL, DAG.getEntryNode(), FrameAddr,
4777 MachinePointerInfo());
4778 return FrameAddr;
4779}
4780
4781// FIXME? Maybe this could be a TableGen attribute on some registers and
4782// this table could be generated automatically from RegInfo.
4783unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
4784 SelectionDAG &DAG) const {
4785 unsigned Reg = StringSwitch<unsigned>(RegName)
4786 .Case("sp", AArch64::SP)
4787 .Case("x18", AArch64::X18)
4788 .Case("w18", AArch64::W18)
4789 .Default(0);
4790 if ((Reg == AArch64::X18 || Reg == AArch64::W18) &&
4791 !Subtarget->isX18Reserved())
4792 Reg = 0;
4793 if (Reg)
4794 return Reg;
4795 report_fatal_error(Twine("Invalid register name \""
4796 + StringRef(RegName) + "\"."));
4797}
4798
4799SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
4800 SelectionDAG &DAG) const {
4801 MachineFunction &MF = DAG.getMachineFunction();
4802 MachineFrameInfo &MFI = MF.getFrameInfo();
4803 MFI.setReturnAddressIsTaken(true);
4804
4805 EVT VT = Op.getValueType();
4806 SDLoc DL(Op);
4807 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4808 if (Depth) {
4809 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4810 SDValue Offset = DAG.getConstant(8, DL, getPointerTy(DAG.getDataLayout()));
4811 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
4812 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
4813 MachinePointerInfo());
4814 }
4815
4816 // Return LR, which contains the return address. Mark it an implicit live-in.
4817 unsigned Reg = MF.addLiveIn(AArch64::LR, &AArch64::GPR64RegClass);
4818 return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
4819}
4820
4821/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4822/// i64 values and take a 2 x i64 value to shift plus a shift amount.
4823SDValue AArch64TargetLowering::LowerShiftRightParts(SDValue Op,
4824 SelectionDAG &DAG) const {
4825 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4825, __extension__ __PRETTY_FUNCTION__))
;
4826 EVT VT = Op.getValueType();
4827 unsigned VTBits = VT.getSizeInBits();
4828 SDLoc dl(Op);
4829 SDValue ShOpLo = Op.getOperand(0);
4830 SDValue ShOpHi = Op.getOperand(1);
4831 SDValue ShAmt = Op.getOperand(2);
4832 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4833
4834 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SRA_PARTS ||
Op.getOpcode() == ISD::SRL_PARTS) ? void (0) : __assert_fail
("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4834, __extension__ __PRETTY_FUNCTION__))
;
4835
4836 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
4837 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
4838 SDValue HiBitsForLo = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4839
4840 // Unfortunately, if ShAmt == 0, we just calculated "(SHL ShOpHi, 64)" which
4841 // is "undef". We wanted 0, so CSEL it directly.
4842 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
4843 ISD::SETEQ, dl, DAG);
4844 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
4845 HiBitsForLo =
4846 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
4847 HiBitsForLo, CCVal, Cmp);
4848
4849 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
4850 DAG.getConstant(VTBits, dl, MVT::i64));
4851
4852 SDValue LoBitsForLo = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4853 SDValue LoForNormalShift =
4854 DAG.getNode(ISD::OR, dl, VT, LoBitsForLo, HiBitsForLo);
4855
4856 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
4857 dl, DAG);
4858 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
4859 SDValue LoForBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4860 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
4861 LoForNormalShift, CCVal, Cmp);
4862
4863 // AArch64 shifts larger than the register width are wrapped rather than
4864 // clamped, so we can't just emit "hi >> x".
4865 SDValue HiForNormalShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4866 SDValue HiForBigShift =
4867 Opc == ISD::SRA
4868 ? DAG.getNode(Opc, dl, VT, ShOpHi,
4869 DAG.getConstant(VTBits - 1, dl, MVT::i64))
4870 : DAG.getConstant(0, dl, VT);
4871 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
4872 HiForNormalShift, CCVal, Cmp);
4873
4874 SDValue Ops[2] = { Lo, Hi };
4875 return DAG.getMergeValues(Ops, dl);
4876}
4877
4878/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4879/// i64 values and take a 2 x i64 value to shift plus a shift amount.
4880SDValue AArch64TargetLowering::LowerShiftLeftParts(SDValue Op,
4881 SelectionDAG &DAG) const {
4882 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4882, __extension__ __PRETTY_FUNCTION__))
;
4883 EVT VT = Op.getValueType();
4884 unsigned VTBits = VT.getSizeInBits();
4885 SDLoc dl(Op);
4886 SDValue ShOpLo = Op.getOperand(0);
4887 SDValue ShOpHi = Op.getOperand(1);
4888 SDValue ShAmt = Op.getOperand(2);
4889
4890 assert(Op.getOpcode() == ISD::SHL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SHL_PARTS) ?
void (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 4890, __extension__ __PRETTY_FUNCTION__))
;
4891 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64,
4892 DAG.getConstant(VTBits, dl, MVT::i64), ShAmt);
4893 SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4894
4895 // Unfortunately, if ShAmt == 0, we just calculated "(SRL ShOpLo, 64)" which
4896 // is "undef". We wanted 0, so CSEL it directly.
4897 SDValue Cmp = emitComparison(ShAmt, DAG.getConstant(0, dl, MVT::i64),
4898 ISD::SETEQ, dl, DAG);
4899 SDValue CCVal = DAG.getConstant(AArch64CC::EQ, dl, MVT::i32);
4900 LoBitsForHi =
4901 DAG.getNode(AArch64ISD::CSEL, dl, VT, DAG.getConstant(0, dl, MVT::i64),
4902 LoBitsForHi, CCVal, Cmp);
4903
4904 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i64, ShAmt,
4905 DAG.getConstant(VTBits, dl, MVT::i64));
4906 SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4907 SDValue HiForNormalShift =
4908 DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
4909
4910 SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4911
4912 Cmp = emitComparison(ExtraShAmt, DAG.getConstant(0, dl, MVT::i64), ISD::SETGE,
4913 dl, DAG);
4914 CCVal = DAG.getConstant(AArch64CC::GE, dl, MVT::i32);
4915 SDValue Hi = DAG.getNode(AArch64ISD::CSEL, dl, VT, HiForBigShift,
4916 HiForNormalShift, CCVal, Cmp);
4917
4918 // AArch64 shifts of larger than register sizes are wrapped rather than
4919 // clamped, so we can't just emit "lo << a" if a is too big.
4920 SDValue LoForBigShift = DAG.getConstant(0, dl, VT);
4921 SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4922 SDValue Lo = DAG.getNode(AArch64ISD::CSEL, dl, VT, LoForBigShift,
4923 LoForNormalShift, CCVal, Cmp);
4924
4925 SDValue Ops[2] = { Lo, Hi };
4926 return DAG.getMergeValues(Ops, dl);
4927}
4928
4929bool AArch64TargetLowering::isOffsetFoldingLegal(
4930 const GlobalAddressSDNode *GA) const {
4931 DEBUG(dbgs() << "Skipping offset folding global address: ")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Skipping offset folding global address: "
; } } while (false)
;
4932 DEBUG(GA->dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { GA->dump(); } } while (false)
;
4933 DEBUG(dbgs() << "AArch64 doesn't support folding offsets into global "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64 doesn't support folding offsets into global "
"addresses\n"; } } while (false)
4934 "addresses\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64 doesn't support folding offsets into global "
"addresses\n"; } } while (false)
;
4935 return false;
4936}
4937
4938bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
4939 // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases.
4940 // FIXME: We should be able to handle f128 as well with a clever lowering.
4941 if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 ||
4942 (VT == MVT::f16 && Subtarget->hasFullFP16()))) {
4943 DEBUG(dbgs() << "Legal fp imm: materialize 0 using the zero register\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal fp imm: materialize 0 using the zero register\n"
; } } while (false)
;
4944 return true;
4945 }
4946
4947 StringRef FPType;
4948 bool IsLegal = false;
4949 SmallString<128> ImmStrVal;
4950 Imm.toString(ImmStrVal);
4951
4952 if (VT == MVT::f64) {
4953 FPType = "f64";
4954 IsLegal = AArch64_AM::getFP64Imm(Imm) != -1;
4955 } else if (VT == MVT::f32) {
4956 FPType = "f32";
4957 IsLegal = AArch64_AM::getFP32Imm(Imm) != -1;
4958 } else if (VT == MVT::f16 && Subtarget->hasFullFP16()) {
4959 FPType = "f16";
4960 IsLegal = AArch64_AM::getFP16Imm(Imm) != -1;
4961 }
4962
4963 if (IsLegal) {
4964 DEBUG(dbgs() << "Legal " << FPType << " imm value: " << ImmStrVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Legal " << FPType
<< " imm value: " << ImmStrVal << "\n"; } }
while (false)
;
4965 return true;
4966 }
4967
4968 if (!FPType.empty())
4969 DEBUG(dbgs() << "Illegal " << FPType << " imm value: " << ImmStrVal << "\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal " << FPType
<< " imm value: " << ImmStrVal << "\n"; } }
while (false)
;
4970 else
4971 DEBUG(dbgs() << "Illegal fp imm " << ImmStrVal << ": unsupported fp type\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Illegal fp imm " <<
ImmStrVal << ": unsupported fp type\n"; } } while (false
)
;
4972
4973 return false;
4974}
4975
4976//===----------------------------------------------------------------------===//
4977// AArch64 Optimization Hooks
4978//===----------------------------------------------------------------------===//
4979
4980static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
4981 SDValue Operand, SelectionDAG &DAG,
4982 int &ExtraSteps) {
4983 EVT VT = Operand.getValueType();
4984 if (ST->hasNEON() &&
4985 (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
4986 VT == MVT::f32 || VT == MVT::v1f32 ||
4987 VT == MVT::v2f32 || VT == MVT::v4f32)) {
4988 if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
4989 // For the reciprocal estimates, convergence is quadratic, so the number
4990 // of digits is doubled after each iteration. In ARMv8, the accuracy of
4991 // the initial estimate is 2^-8. Thus the number of extra steps to refine
4992 // the result for float (23 mantissa bits) is 2 and for double (52
4993 // mantissa bits) is 3.
4994 ExtraSteps = VT.getScalarType() == MVT::f64 ? 3 : 2;
4995
4996 return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
4997 }
4998
4999 return SDValue();
5000}
5001
5002SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
5003 SelectionDAG &DAG, int Enabled,
5004 int &ExtraSteps,
5005 bool &UseOneConst,
5006 bool Reciprocal) const {
5007 if (Enabled == ReciprocalEstimate::Enabled ||
5008 (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
5009 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
5010 DAG, ExtraSteps)) {
5011 SDLoc DL(Operand);
5012 EVT VT = Operand.getValueType();
5013
5014 SDNodeFlags Flags;
5015 Flags.setUnsafeAlgebra(true);
5016
5017 // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
5018 // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
5019 for (int i = ExtraSteps; i > 0; --i) {
5020 SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
5021 Flags);
5022 Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, Flags);
5023 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5024 }
5025 if (!Reciprocal) {
5026 EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
5027 VT);
5028 SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
5029 SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
5030
5031 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, Flags);
5032 // Correct the result if the operand is 0.0.
5033 Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
5034 VT, Eq, Operand, Estimate);
5035 }
5036
5037 ExtraSteps = 0;
5038 return Estimate;
5039 }
5040
5041 return SDValue();
5042}
5043
5044SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
5045 SelectionDAG &DAG, int Enabled,
5046 int &ExtraSteps) const {
5047 if (Enabled == ReciprocalEstimate::Enabled)
5048 if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
5049 DAG, ExtraSteps)) {
5050 SDLoc DL(Operand);
5051 EVT VT = Operand.getValueType();
5052
5053 SDNodeFlags Flags;
5054 Flags.setUnsafeAlgebra(true);
5055
5056 // Newton reciprocal iteration: E * (2 - X * E)
5057 // AArch64 reciprocal iteration instruction: (2 - M * N)
5058 for (int i = ExtraSteps; i > 0; --i) {
5059 SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
5060 Estimate, Flags);
5061 Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, Flags);
5062 }
5063
5064 ExtraSteps = 0;
5065 return Estimate;
5066 }
5067
5068 return SDValue();
5069}
5070
5071//===----------------------------------------------------------------------===//
5072// AArch64 Inline Assembly Support
5073//===----------------------------------------------------------------------===//
5074
5075// Table of Constraints
5076// TODO: This is the current set of constraints supported by ARM for the
5077// compiler, not all of them may make sense, e.g. S may be difficult to support.
5078//
5079// r - A general register
5080// w - An FP/SIMD register of some size in the range v0-v31
5081// x - An FP/SIMD register of some size in the range v0-v15
5082// I - Constant that can be used with an ADD instruction
5083// J - Constant that can be used with a SUB instruction
5084// K - Constant that can be used with a 32-bit logical instruction
5085// L - Constant that can be used with a 64-bit logical instruction
5086// M - Constant that can be used as a 32-bit MOV immediate
5087// N - Constant that can be used as a 64-bit MOV immediate
5088// Q - A memory reference with base register and no offset
5089// S - A symbolic address
5090// Y - Floating point constant zero
5091// Z - Integer constant zero
5092//
5093// Note that general register operands will be output using their 64-bit x
5094// register name, whatever the size of the variable, unless the asm operand
5095// is prefixed by the %w modifier. Floating-point and SIMD register operands
5096// will be output with the v prefix unless prefixed by the %b, %h, %s, %d or
5097// %q modifier.
5098const char *AArch64TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
5099 // At this point, we have to lower this constraint to something else, so we
5100 // lower it to an "r" or "w". However, by doing this we will force the result
5101 // to be in register, while the X constraint is much more permissive.
5102 //
5103 // Although we are correct (we are free to emit anything, without
5104 // constraints), we might break use cases that would expect us to be more
5105 // efficient and emit something else.
5106 if (!Subtarget->hasFPARMv8())
5107 return "r";
5108
5109 if (ConstraintVT.isFloatingPoint())
5110 return "w";
5111
5112 if (ConstraintVT.isVector() &&
5113 (ConstraintVT.getSizeInBits() == 64 ||
5114 ConstraintVT.getSizeInBits() == 128))
5115 return "w";
5116
5117 return "r";
5118}
5119
5120/// getConstraintType - Given a constraint letter, return the type of
5121/// constraint it is for this target.
5122AArch64TargetLowering::ConstraintType
5123AArch64TargetLowering::getConstraintType(StringRef Constraint) const {
5124 if (Constraint.size() == 1) {
5125 switch (Constraint[0]) {
5126 default:
5127 break;
5128 case 'z':
5129 return C_Other;
5130 case 'x':
5131 case 'w':
5132 return C_RegisterClass;
5133 // An address with a single base register. Due to the way we
5134 // currently handle addresses it is the same as 'r'.
5135 case 'Q':
5136 return C_Memory;
5137 }
5138 }
5139 return TargetLowering::getConstraintType(Constraint);
5140}
5141
5142/// Examine constraint type and operand type and determine a weight value.
5143/// This object must already have been set up with the operand type
5144/// and the current alternative constraint selected.
5145TargetLowering::ConstraintWeight
5146AArch64TargetLowering::getSingleConstraintMatchWeight(
5147 AsmOperandInfo &info, const char *constraint) const {
5148 ConstraintWeight weight = CW_Invalid;
5149 Value *CallOperandVal = info.CallOperandVal;
5150 // If we don't have a value, we can't do a match,
5151 // but allow it at the lowest weight.
5152 if (!CallOperandVal)
5153 return CW_Default;
5154 Type *type = CallOperandVal->getType();
5155 // Look at the constraint type.
5156 switch (*constraint) {
5157 default:
5158 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
5159 break;
5160 case 'x':
5161 case 'w':
5162 if (type->isFloatingPointTy() || type->isVectorTy())
5163 weight = CW_Register;
5164 break;
5165 case 'z':
5166 weight = CW_Constant;
5167 break;
5168 }
5169 return weight;
5170}
5171
5172std::pair<unsigned, const TargetRegisterClass *>
5173AArch64TargetLowering::getRegForInlineAsmConstraint(
5174 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
5175 if (Constraint.size() == 1) {
5176 switch (Constraint[0]) {
5177 case 'r':
5178 if (VT.getSizeInBits() == 64)
5179 return std::make_pair(0U, &AArch64::GPR64commonRegClass);
5180 return std::make_pair(0U, &AArch64::GPR32commonRegClass);
5181 case 'w':
5182 if (VT.getSizeInBits() == 16)
5183 return std::make_pair(0U, &AArch64::FPR16RegClass);
5184 if (VT.getSizeInBits() == 32)
5185 return std::make_pair(0U, &AArch64::FPR32RegClass);
5186 if (VT.getSizeInBits() == 64)
5187 return std::make_pair(0U, &AArch64::FPR64RegClass);
5188 if (VT.getSizeInBits() == 128)
5189 return std::make_pair(0U, &AArch64::FPR128RegClass);
5190 break;
5191 // The instructions that this constraint is designed for can
5192 // only take 128-bit registers so just use that regclass.
5193 case 'x':
5194 if (VT.getSizeInBits() == 128)
5195 return std::make_pair(0U, &AArch64::FPR128_loRegClass);
5196 break;
5197 }
5198 }
5199 if (StringRef("{cc}").equals_lower(Constraint))
5200 return std::make_pair(unsigned(AArch64::NZCV), &AArch64::CCRRegClass);
5201
5202 // Use the default implementation in TargetLowering to convert the register
5203 // constraint into a member of a register class.
5204 std::pair<unsigned, const TargetRegisterClass *> Res;
5205 Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
5206
5207 // Not found as a standard register?
5208 if (!Res.second) {
5209 unsigned Size = Constraint.size();
5210 if ((Size == 4 || Size == 5) && Constraint[0] == '{' &&
5211 tolower(Constraint[1]) == 'v' && Constraint[Size - 1] == '}') {
5212 int RegNo;
5213 bool Failed = Constraint.slice(2, Size - 1).getAsInteger(10, RegNo);
5214 if (!Failed && RegNo >= 0 && RegNo <= 31) {
5215 // v0 - v31 are aliases of q0 - q31 or d0 - d31 depending on size.
5216 // By default we'll emit v0-v31 for this unless there's a modifier where
5217 // we'll emit the correct register as well.
5218 if (VT != MVT::Other && VT.getSizeInBits() == 64) {
5219 Res.first = AArch64::FPR64RegClass.getRegister(RegNo);
5220 Res.second = &AArch64::FPR64RegClass;
5221 } else {
5222 Res.first = AArch64::FPR128RegClass.getRegister(RegNo);
5223 Res.second = &AArch64::FPR128RegClass;
5224 }
5225 }
5226 }
5227 }
5228
5229 return Res;
5230}
5231
5232/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
5233/// vector. If it is invalid, don't add anything to Ops.
5234void AArch64TargetLowering::LowerAsmOperandForConstraint(
5235 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
5236 SelectionDAG &DAG) const {
5237 SDValue Result;
5238
5239 // Currently only support length 1 constraints.
5240 if (Constraint.length() != 1)
5241 return;
5242
5243 char ConstraintLetter = Constraint[0];
5244 switch (ConstraintLetter) {
5245 default:
5246 break;
5247
5248 // This set of constraints deal with valid constants for various instructions.
5249 // Validate and return a target constant for them if we can.
5250 case 'z': {
5251 // 'z' maps to xzr or wzr so it needs an input of 0.
5252 if (!isNullConstant(Op))
5253 return;
5254
5255 if (Op.getValueType() == MVT::i64)
5256 Result = DAG.getRegister(AArch64::XZR, MVT::i64);
5257 else
5258 Result = DAG.getRegister(AArch64::WZR, MVT::i32);
5259 break;
5260 }
5261
5262 case 'I':
5263 case 'J':
5264 case 'K':
5265 case 'L':
5266 case 'M':
5267 case 'N':
5268 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
5269 if (!C)
5270 return;
5271
5272 // Grab the value and do some validation.
5273 uint64_t CVal = C->getZExtValue();
5274 switch (ConstraintLetter) {
5275 // The I constraint applies only to simple ADD or SUB immediate operands:
5276 // i.e. 0 to 4095 with optional shift by 12
5277 // The J constraint applies only to ADD or SUB immediates that would be
5278 // valid when negated, i.e. if [an add pattern] were to be output as a SUB
5279 // instruction [or vice versa], in other words -1 to -4095 with optional
5280 // left shift by 12.
5281 case 'I':
5282 if (isUInt<12>(CVal) || isShiftedUInt<12, 12>(CVal))
5283 break;
5284 return;
5285 case 'J': {
5286 uint64_t NVal = -C->getSExtValue();
5287 if (isUInt<12>(NVal) || isShiftedUInt<12, 12>(NVal)) {
5288 CVal = C->getSExtValue();
5289 break;
5290 }
5291 return;
5292 }
5293 // The K and L constraints apply *only* to logical immediates, including
5294 // what used to be the MOVI alias for ORR (though the MOVI alias has now
5295 // been removed and MOV should be used). So these constraints have to
5296 // distinguish between bit patterns that are valid 32-bit or 64-bit
5297 // "bitmask immediates": for example 0xaaaaaaaa is a valid bimm32 (K), but
5298 // not a valid bimm64 (L) where 0xaaaaaaaaaaaaaaaa would be valid, and vice
5299 // versa.
5300 case 'K':
5301 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5302 break;
5303 return;
5304 case 'L':
5305 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5306 break;
5307 return;
5308 // The M and N constraints are a superset of K and L respectively, for use
5309 // with the MOV (immediate) alias. As well as the logical immediates they
5310 // also match 32 or 64-bit immediates that can be loaded either using a
5311 // *single* MOVZ or MOVN , such as 32-bit 0x12340000, 0x00001234, 0xffffedca
5312 // (M) or 64-bit 0x1234000000000000 (N) etc.
5313 // As a note some of this code is liberally stolen from the asm parser.
5314 case 'M': {
5315 if (!isUInt<32>(CVal))
5316 return;
5317 if (AArch64_AM::isLogicalImmediate(CVal, 32))
5318 break;
5319 if ((CVal & 0xFFFF) == CVal)
5320 break;
5321 if ((CVal & 0xFFFF0000ULL) == CVal)
5322 break;
5323 uint64_t NCVal = ~(uint32_t)CVal;
5324 if ((NCVal & 0xFFFFULL) == NCVal)
5325 break;
5326 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5327 break;
5328 return;
5329 }
5330 case 'N': {
5331 if (AArch64_AM::isLogicalImmediate(CVal, 64))
5332 break;
5333 if ((CVal & 0xFFFFULL) == CVal)
5334 break;
5335 if ((CVal & 0xFFFF0000ULL) == CVal)
5336 break;
5337 if ((CVal & 0xFFFF00000000ULL) == CVal)
5338 break;
5339 if ((CVal & 0xFFFF000000000000ULL) == CVal)
5340 break;
5341 uint64_t NCVal = ~CVal;
5342 if ((NCVal & 0xFFFFULL) == NCVal)
5343 break;
5344 if ((NCVal & 0xFFFF0000ULL) == NCVal)
5345 break;
5346 if ((NCVal & 0xFFFF00000000ULL) == NCVal)
5347 break;
5348 if ((NCVal & 0xFFFF000000000000ULL) == NCVal)
5349 break;
5350 return;
5351 }
5352 default:
5353 return;
5354 }
5355
5356 // All assembler immediates are 64-bit integers.
5357 Result = DAG.getTargetConstant(CVal, SDLoc(Op), MVT::i64);
5358 break;
5359 }
5360
5361 if (Result.getNode()) {
5362 Ops.push_back(Result);
5363 return;
5364 }
5365
5366 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
5367}
5368
5369//===----------------------------------------------------------------------===//
5370// AArch64 Advanced SIMD Support
5371//===----------------------------------------------------------------------===//
5372
5373/// WidenVector - Given a value in the V64 register class, produce the
5374/// equivalent value in the V128 register class.
5375static SDValue WidenVector(SDValue V64Reg, SelectionDAG &DAG) {
5376 EVT VT = V64Reg.getValueType();
5377 unsigned NarrowSize = VT.getVectorNumElements();
5378 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5379 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
5380 SDLoc DL(V64Reg);
5381
5382 return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideTy, DAG.getUNDEF(WideTy),
5383 V64Reg, DAG.getConstant(0, DL, MVT::i32));
5384}
5385
5386/// getExtFactor - Determine the adjustment factor for the position when
5387/// generating an "extract from vector registers" instruction.
5388static unsigned getExtFactor(SDValue &V) {
5389 EVT EltType = V.getValueType().getVectorElementType();
5390 return EltType.getSizeInBits() / 8;
5391}
5392
5393/// NarrowVector - Given a value in the V128 register class, produce the
5394/// equivalent value in the V64 register class.
5395static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
5396 EVT VT = V128Reg.getValueType();
5397 unsigned WideSize = VT.getVectorNumElements();
5398 MVT EltTy = VT.getVectorElementType().getSimpleVT();
5399 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
5400 SDLoc DL(V128Reg);
5401
5402 return DAG.getTargetExtractSubreg(AArch64::dsub, DL, NarrowTy, V128Reg);
5403}
5404
5405// Gather data to see if the operation can be modelled as a
5406// shuffle in combination with VEXTs.
5407SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
5408 SelectionDAG &DAG) const {
5409 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5409, __extension__ __PRETTY_FUNCTION__))
;
5410 DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "AArch64TargetLowering::ReconstructShuffle\n"
; } } while (false)
;
5411 SDLoc dl(Op);
5412 EVT VT = Op.getValueType();
5413 unsigned NumElts = VT.getVectorNumElements();
5414
5415 struct ShuffleSourceInfo {
5416 SDValue Vec;
5417 unsigned MinElt;
5418 unsigned MaxElt;
5419
5420 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
5421 // be compatible with the shuffle we intend to construct. As a result
5422 // ShuffleVec will be some sliding window into the original Vec.
5423 SDValue ShuffleVec;
5424
5425 // Code should guarantee that element i in Vec starts at element "WindowBase
5426 // + i * WindowScale in ShuffleVec".
5427 int WindowBase;
5428 int WindowScale;
5429
5430 ShuffleSourceInfo(SDValue Vec)
5431 : Vec(Vec), MinElt(std::numeric_limits<unsigned>::max()), MaxElt(0),
5432 ShuffleVec(Vec), WindowBase(0), WindowScale(1) {}
5433
5434 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
5435 };
5436
5437 // First gather all vectors used as an immediate source for this BUILD_VECTOR
5438 // node.
5439 SmallVector<ShuffleSourceInfo, 2> Sources;
5440 for (unsigned i = 0; i < NumElts; ++i) {
5441 SDValue V = Op.getOperand(i);
5442 if (V.isUndef())
5443 continue;
5444 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
5445 !isa<ConstantSDNode>(V.getOperand(1))) {
5446 DEBUG(dbgs() << "Reshuffle failed: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5447 "a shuffle can only come from building a vector from "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5448 "various elements of other vectors, provided their "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
5449 "indices are constant\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: " "a shuffle can only come from building a vector from "
"various elements of other vectors, provided their " "indices are constant\n"
; } } while (false)
;
5450 return SDValue();
5451 }
5452
5453 // Add this element source to the list if it's not already there.
5454 SDValue SourceVec = V.getOperand(0);
5455 auto Source = find(Sources, SourceVec);
5456 if (Source == Sources.end())
5457 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
5458
5459 // Update the minimum and maximum lane number seen.
5460 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
5461 Source->MinElt = std::min(Source->MinElt, EltNo);
5462 Source->MaxElt = std::max(Source->MaxElt, EltNo);
5463 }
5464
5465 if (Sources.size() > 2) {
5466 DEBUG(dbgs() << "Reshuffle failed: currently only do something sane when at "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
5467 "most two source vectors are involved\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: currently only do something sane when at "
"most two source vectors are involved\n"; } } while (false)
;
5468 return SDValue();
5469 }
5470
5471 // Find out the smallest element size among result and two sources, and use
5472 // it as element size to build the shuffle_vector.
5473 EVT SmallestEltTy = VT.getVectorElementType();
5474 for (auto &Source : Sources) {
5475 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
5476 if (SrcEltTy.bitsLT(SmallestEltTy)) {
5477 SmallestEltTy = SrcEltTy;
5478 }
5479 }
5480 unsigned ResMultiplier =
5481 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
5482 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
5483 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
5484
5485 // If the source vector is too wide or too narrow, we may nevertheless be able
5486 // to construct a compatible shuffle either by concatenating it with UNDEF or
5487 // extracting a suitable range of elements.
5488 for (auto &Src : Sources) {
5489 EVT SrcVT = Src.ShuffleVec.getValueType();
5490
5491 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
5492 continue;
5493
5494 // This stage of the search produces a source with the same element type as
5495 // the original, but with a total width matching the BUILD_VECTOR output.
5496 EVT EltVT = SrcVT.getVectorElementType();
5497 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
5498 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
5499
5500 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
5501 assert(2 * SrcVT.getSizeInBits() == VT.getSizeInBits())(static_cast <bool> (2 * SrcVT.getSizeInBits() == VT.getSizeInBits
()) ? void (0) : __assert_fail ("2 * SrcVT.getSizeInBits() == VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5501, __extension__ __PRETTY_FUNCTION__))
;
5502 // We can pad out the smaller vector for free, so if it's part of a
5503 // shuffle...
5504 Src.ShuffleVec =
5505 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
5506 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
5507 continue;
5508 }
5509
5510 assert(SrcVT.getSizeInBits() == 2 * VT.getSizeInBits())(static_cast <bool> (SrcVT.getSizeInBits() == 2 * VT.getSizeInBits
()) ? void (0) : __assert_fail ("SrcVT.getSizeInBits() == 2 * VT.getSizeInBits()"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5510, __extension__ __PRETTY_FUNCTION__))
;
5511
5512 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
5513 DEBUG(dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: span too large for a VEXT to cope\n"
; } } while (false)
;
5514 return SDValue();
5515 }
5516
5517 if (Src.MinElt >= NumSrcElts) {
5518 // The extraction can just take the second half
5519 Src.ShuffleVec =
5520 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5521 DAG.getConstant(NumSrcElts, dl, MVT::i64));
5522 Src.WindowBase = -NumSrcElts;
5523 } else if (Src.MaxElt < NumSrcElts) {
5524 // The extraction can just take the first half
5525 Src.ShuffleVec =
5526 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5527 DAG.getConstant(0, dl, MVT::i64));
5528 } else {
5529 // An actual VEXT is needed
5530 SDValue VEXTSrc1 =
5531 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5532 DAG.getConstant(0, dl, MVT::i64));
5533 SDValue VEXTSrc2 =
5534 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
5535 DAG.getConstant(NumSrcElts, dl, MVT::i64));
5536 unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
5537
5538 Src.ShuffleVec = DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1,
5539 VEXTSrc2,
5540 DAG.getConstant(Imm, dl, MVT::i32));
5541 Src.WindowBase = -Src.MinElt;
5542 }
5543 }
5544
5545 // Another possible incompatibility occurs from the vector element types. We
5546 // can fix this by bitcasting the source vectors to the same type we intend
5547 // for the shuffle.
5548 for (auto &Src : Sources) {
5549 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
5550 if (SrcEltTy == SmallestEltTy)
5551 continue;
5552 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)(static_cast <bool> (ShuffleVT.getVectorElementType() ==
SmallestEltTy) ? void (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5552, __extension__ __PRETTY_FUNCTION__))
;
5553 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
5554 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
5555 Src.WindowBase *= Src.WindowScale;
5556 }
5557
5558 // Final sanity check before we try to actually produce a shuffle.
5559 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5561, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
5560 for (auto Src : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5561, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
5561 assert(Src.ShuffleVec.getValueType() == ShuffleVT);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5561, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
5562 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { for (auto Src : Sources) (static_cast <
bool> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (
0) : __assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT"
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5561, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
;
5563
5564 // The stars all align, our next step is to produce the mask for the shuffle.
5565 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
5566 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
5567 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
5568 SDValue Entry = Op.getOperand(i);
5569 if (Entry.isUndef())
5570 continue;
5571
5572 auto Src = find(Sources, Entry.getOperand(0));
5573 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
5574
5575 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
5576 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
5577 // segment.
5578 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
5579 int BitsDefined =
5580 std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits());
5581 int LanesDefined = BitsDefined / BitsPerShuffleLane;
5582
5583 // This source is expected to fill ResMultiplier lanes of the final shuffle,
5584 // starting at the appropriate offset.
5585 int *LaneMask = &Mask[i * ResMultiplier];
5586
5587 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
5588 ExtractBase += NumElts * (Src - Sources.begin());
5589 for (int j = 0; j < LanesDefined; ++j)
5590 LaneMask[j] = ExtractBase + j;
5591 }
5592
5593 // Final check before we try to produce nonsense...
5594 if (!isShuffleMaskLegal(Mask, ShuffleVT)) {
5595 DEBUG(dbgs() << "Reshuffle failed: illegal shuffle mask\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle failed: illegal shuffle mask\n"
; } } while (false)
;
5596 return SDValue();
5597 }
5598
5599 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
5600 for (unsigned i = 0; i < Sources.size(); ++i)
5601 ShuffleOps[i] = Sources[i].ShuffleVec;
5602
5603 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
5604 ShuffleOps[1], Mask);
5605 SDValue V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
5606
5607 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5608 dbgs() << "Reshuffle, creating node: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5609 Shuffle.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5610 dbgs() << "Reshuffle, creating node: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5611 V.dump();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
5612 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64-lower")) { dbgs() << "Reshuffle, creating node: "
; Shuffle.dump(); dbgs() << "Reshuffle, creating node: "
; V.dump();; } } while (false)
;
5613
5614 return V;
5615}
5616
5617// check if an EXT instruction can handle the shuffle mask when the
5618// vector sources of the shuffle are the same.
5619static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5620 unsigned NumElts = VT.getVectorNumElements();
5621
5622 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5623 if (M[0] < 0)
5624 return false;
5625
5626 Imm = M[0];
5627
5628 // If this is a VEXT shuffle, the immediate value is the index of the first
5629 // element. The other shuffle indices must be the successive elements after
5630 // the first one.
5631 unsigned ExpectedElt = Imm;
5632 for (unsigned i = 1; i < NumElts; ++i) {
5633 // Increment the expected index. If it wraps around, just follow it
5634 // back to index zero and keep going.
5635 ++ExpectedElt;
5636 if (ExpectedElt == NumElts)
5637 ExpectedElt = 0;
5638
5639 if (M[i] < 0)
5640 continue; // ignore UNDEF indices
5641 if (ExpectedElt != static_cast<unsigned>(M[i]))
5642 return false;
5643 }
5644
5645 return true;
5646}
5647
5648// check if an EXT instruction can handle the shuffle mask when the
5649// vector sources of the shuffle are different.
5650static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
5651 unsigned &Imm) {
5652 // Look for the first non-undef element.
5653 const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
5654
5655 // Benefit form APInt to handle overflow when calculating expected element.
5656 unsigned NumElts = VT.getVectorNumElements();
5657 unsigned MaskBits = APInt(32, NumElts * 2).logBase2();
5658 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1);
5659 // The following shuffle indices must be the successive elements after the
5660 // first real element.
5661 const int *FirstWrongElt = std::find_if(FirstRealElt + 1, M.end(),
5662 [&](int Elt) {return Elt != ExpectedElt++ && Elt != -1;});
5663 if (FirstWrongElt != M.end())
5664 return false;
5665
5666 // The index of an EXT is the first element if it is not UNDEF.
5667 // Watch out for the beginning UNDEFs. The EXT index should be the expected
5668 // value of the first element. E.g.
5669 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>.
5670 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>.
5671 // ExpectedElt is the last mask index plus 1.
5672 Imm = ExpectedElt.getZExtValue();
5673
5674 // There are two difference cases requiring to reverse input vectors.
5675 // For example, for vector <4 x i32> we have the following cases,
5676 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>)
5677 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>)
5678 // For both cases, we finally use mask <5, 6, 7, 0>, which requires
5679 // to reverse two input vectors.
5680 if (Imm < NumElts)
5681 ReverseEXT = true;
5682 else
5683 Imm -= NumElts;
5684
5685 return true;
5686}
5687
5688/// isREVMask - Check if a vector shuffle corresponds to a REV
5689/// instruction with the specified blocksize. (The order of the elements
5690/// within each block of the vector is reversed.)
5691static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5692 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5693, __extension__ __PRETTY_FUNCTION__))
5693 "Only possible block sizes for REV are: 16, 32, 64")(static_cast <bool> ((BlockSize == 16 || BlockSize == 32
|| BlockSize == 64) && "Only possible block sizes for REV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && \"Only possible block sizes for REV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5693, __extension__ __PRETTY_FUNCTION__))
;
5694
5695 unsigned EltSz = VT.getScalarSizeInBits();
5696 if (EltSz == 64)
5697 return false;
5698
5699 unsigned NumElts = VT.getVectorNumElements();
5700 unsigned BlockElts = M[0] + 1;
5701 // If the first shuffle index is UNDEF, be optimistic.
5702 if (M[0] < 0)
5703 BlockElts = BlockSize / EltSz;
5704
5705 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5706 return false;
5707
5708 for (unsigned i = 0; i < NumElts; ++i) {
5709 if (M[i] < 0)
5710 continue; // ignore UNDEF indices
5711 if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts))
5712 return false;
5713 }
5714
5715 return true;
5716}
5717
5718static bool isZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5719 unsigned NumElts = VT.getVectorNumElements();
5720 WhichResult = (M[0] == 0 ? 0 : 1);
5721 unsigned Idx = WhichResult * NumElts / 2;
5722 for (unsigned i = 0; i != NumElts; i += 2) {
5723 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
5724 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx + NumElts))
5725 return false;
5726 Idx += 1;
5727 }
5728
5729 return true;
5730}
5731
5732static bool isUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5733 unsigned NumElts = VT.getVectorNumElements();
5734 WhichResult = (M[0] == 0 ? 0 : 1);
5735 for (unsigned i = 0; i != NumElts; ++i) {
5736 if (M[i] < 0)
5737 continue; // ignore UNDEF indices
5738 if ((unsigned)M[i] != 2 * i + WhichResult)
5739 return false;
5740 }
5741
5742 return true;
5743}
5744
5745static bool isTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5746 unsigned NumElts = VT.getVectorNumElements();
5747 WhichResult = (M[0] == 0 ? 0 : 1);
5748 for (unsigned i = 0; i < NumElts; i += 2) {
5749 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
5750 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + NumElts + WhichResult))
5751 return false;
5752 }
5753 return true;
5754}
5755
5756/// isZIP_v_undef_Mask - Special case of isZIPMask for canonical form of
5757/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5758/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5759static bool isZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5760 unsigned NumElts = VT.getVectorNumElements();
5761 WhichResult = (M[0] == 0 ? 0 : 1);
5762 unsigned Idx = WhichResult * NumElts / 2;
5763 for (unsigned i = 0; i != NumElts; i += 2) {
5764 if ((M[i] >= 0 && (unsigned)M[i] != Idx) ||
5765 (M[i + 1] >= 0 && (unsigned)M[i + 1] != Idx))
5766 return false;
5767 Idx += 1;
5768 }
5769
5770 return true;
5771}
5772
5773/// isUZP_v_undef_Mask - Special case of isUZPMask for canonical form of
5774/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5775/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5776static bool isUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5777 unsigned Half = VT.getVectorNumElements() / 2;
5778 WhichResult = (M[0] == 0 ? 0 : 1);
5779 for (unsigned j = 0; j != 2; ++j) {
5780 unsigned Idx = WhichResult;
5781 for (unsigned i = 0; i != Half; ++i) {
5782 int MIdx = M[i + j * Half];
5783 if (MIdx >= 0 && (unsigned)MIdx != Idx)
5784 return false;
5785 Idx += 2;
5786 }
5787 }
5788
5789 return true;
5790}
5791
5792/// isTRN_v_undef_Mask - Special case of isTRNMask for canonical form of
5793/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5794/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5795static bool isTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5796 unsigned NumElts = VT.getVectorNumElements();
5797 WhichResult = (M[0] == 0 ? 0 : 1);
5798 for (unsigned i = 0; i < NumElts; i += 2) {
5799 if ((M[i] >= 0 && (unsigned)M[i] != i + WhichResult) ||
5800 (M[i + 1] >= 0 && (unsigned)M[i + 1] != i + WhichResult))
5801 return false;
5802 }
5803 return true;
5804}
5805
5806static bool isINSMask(ArrayRef<int> M, int NumInputElements,
5807 bool &DstIsLeft, int &Anomaly) {
5808 if (M.size() != static_cast<size_t>(NumInputElements))
5809 return false;
5810
5811 int NumLHSMatch = 0, NumRHSMatch = 0;
5812 int LastLHSMismatch = -1, LastRHSMismatch = -1;
5813
5814 for (int i = 0; i < NumInputElements; ++i) {
5815 if (M[i] == -1) {
5816 ++NumLHSMatch;
5817 ++NumRHSMatch;
5818 continue;
5819 }
5820
5821 if (M[i] == i)
5822 ++NumLHSMatch;
5823 else
5824 LastLHSMismatch = i;
5825
5826 if (M[i] == i + NumInputElements)
5827 ++NumRHSMatch;
5828 else
5829 LastRHSMismatch = i;
5830 }
5831
5832 if (NumLHSMatch == NumInputElements - 1) {
5833 DstIsLeft = true;
5834 Anomaly = LastLHSMismatch;
5835 return true;
5836 } else if (NumRHSMatch == NumInputElements - 1) {
5837 DstIsLeft = false;
5838 Anomaly = LastRHSMismatch;
5839 return true;
5840 }
5841
5842 return false;
5843}
5844
5845static bool isConcatMask(ArrayRef<int> Mask, EVT VT, bool SplitLHS) {
5846 if (VT.getSizeInBits() != 128)
5847 return false;
5848
5849 unsigned NumElts = VT.getVectorNumElements();
5850
5851 for (int I = 0, E = NumElts / 2; I != E; I++) {
5852 if (Mask[I] != I)
5853 return false;
5854 }
5855
5856 int Offset = NumElts / 2;
5857 for (int I = NumElts / 2, E = NumElts; I != E; I++) {
5858 if (Mask[I] != I + SplitLHS * Offset)
5859 return false;
5860 }
5861
5862 return true;
5863}
5864
5865static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
5866 SDLoc DL(Op);
5867 EVT VT = Op.getValueType();
5868 SDValue V0 = Op.getOperand(0);
5869 SDValue V1 = Op.getOperand(1);
5870 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op)->getMask();
5871
5872 if (VT.getVectorElementType() != V0.getValueType().getVectorElementType() ||
5873 VT.getVectorElementType() != V1.getValueType().getVectorElementType())
5874 return SDValue();
5875
5876 bool SplitV0 = V0.getValueSizeInBits() == 128;
5877
5878 if (!isConcatMask(Mask, VT, SplitV0))
5879 return SDValue();
5880
5881 EVT CastVT = EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(),
5882 VT.getVectorNumElements() / 2);
5883 if (SplitV0) {
5884 V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
5885 DAG.getConstant(0, DL, MVT::i64));
5886 }
5887 if (V1.getValueSizeInBits() == 128) {
5888 V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
5889 DAG.getConstant(0, DL, MVT::i64));
5890 }
5891 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, V0, V1);
5892}
5893
5894/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
5895/// the specified operations to build the shuffle.
5896static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
5897 SDValue RHS, SelectionDAG &DAG,
5898 const SDLoc &dl) {
5899 unsigned OpNum = (PFEntry >> 26) & 0x0F;
5900 unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
5901 unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
5902
5903 enum {
5904 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
5905 OP_VREV,
5906 OP_VDUP0,
5907 OP_VDUP1,
5908 OP_VDUP2,
5909 OP_VDUP3,
5910 OP_VEXT1,
5911 OP_VEXT2,
5912 OP_VEXT3,
5913 OP_VUZPL, // VUZP, left result
5914 OP_VUZPR, // VUZP, right result
5915 OP_VZIPL, // VZIP, left result
5916 OP_VZIPR, // VZIP, right result
5917 OP_VTRNL, // VTRN, left result
5918 OP_VTRNR // VTRN, right result
5919 };
5920
5921 if (OpNum == OP_COPY) {
5922 if (LHSID == (1 * 9 + 2) * 9 + 3)
5923 return LHS;
5924 assert(LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && "Illegal OP_COPY!")(static_cast <bool> (LHSID == ((4 * 9 + 5) * 9 + 6) * 9
+ 7 && "Illegal OP_COPY!") ? void (0) : __assert_fail
("LHSID == ((4 * 9 + 5) * 9 + 6) * 9 + 7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-7~svn325874/lib/Target/AArch64/AArch64ISelLowering.cpp"
, 5924, __extension__ __PRETTY_FUNCTION__))
;
5925 return RHS;
5926 }
5927
5928 SDValue OpLHS, OpRHS;
5929 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
5930 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
5931 EVT VT = OpLHS.getValueType();
5932