Bug Summary

File:lib/Target/ARM/ARMISelLowering.cpp
Warning:line 247, column 20
Excessive padding in 'struct (anonymous at /tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp:247:20)' (8 padding bytes, where 0 is optimal). Optimal fields order: Name, Op, Cond, consider reordering the fields or adding explicit padding members

Annotated Source Code

1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARMISelLowering.h"
16#include "ARMBaseInstrInfo.h"
17#include "ARMBaseRegisterInfo.h"
18#include "ARMCallingConv.h"
19#include "ARMConstantPoolValue.h"
20#include "ARMMachineFunctionInfo.h"
21#include "ARMPerfectShuffle.h"
22#include "ARMRegisterInfo.h"
23#include "ARMSelectionDAGInfo.h"
24#include "ARMSubtarget.h"
25#include "MCTargetDesc/ARMAddressingModes.h"
26#include "MCTargetDesc/ARMBaseInfo.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/APInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/BitVector.h"
31#include "llvm/ADT/DenseMap.h"
32#include "llvm/ADT/STLExtras.h"
33#include "llvm/ADT/SmallPtrSet.h"
34#include "llvm/ADT/SmallVector.h"
35#include "llvm/ADT/Statistic.h"
36#include "llvm/ADT/StringExtras.h"
37#include "llvm/ADT/StringRef.h"
38#include "llvm/ADT/StringSwitch.h"
39#include "llvm/ADT/Triple.h"
40#include "llvm/ADT/Twine.h"
41#include "llvm/Analysis/VectorUtils.h"
42#include "llvm/CodeGen/CallingConvLower.h"
43#include "llvm/CodeGen/ISDOpcodes.h"
44#include "llvm/CodeGen/IntrinsicLowering.h"
45#include "llvm/CodeGen/MachineBasicBlock.h"
46#include "llvm/CodeGen/MachineConstantPool.h"
47#include "llvm/CodeGen/MachineFrameInfo.h"
48#include "llvm/CodeGen/MachineFunction.h"
49#include "llvm/CodeGen/MachineInstr.h"
50#include "llvm/CodeGen/MachineInstrBuilder.h"
51#include "llvm/CodeGen/MachineJumpTableInfo.h"
52#include "llvm/CodeGen/MachineMemOperand.h"
53#include "llvm/CodeGen/MachineOperand.h"
54#include "llvm/CodeGen/MachineRegisterInfo.h"
55#include "llvm/CodeGen/MachineValueType.h"
56#include "llvm/CodeGen/RuntimeLibcalls.h"
57#include "llvm/CodeGen/SelectionDAG.h"
58#include "llvm/CodeGen/SelectionDAGNodes.h"
59#include "llvm/CodeGen/ValueTypes.h"
60#include "llvm/IR/Attributes.h"
61#include "llvm/IR/CallingConv.h"
62#include "llvm/IR/Constant.h"
63#include "llvm/IR/Constants.h"
64#include "llvm/IR/DataLayout.h"
65#include "llvm/IR/DebugLoc.h"
66#include "llvm/IR/DerivedTypes.h"
67#include "llvm/IR/Function.h"
68#include "llvm/IR/GlobalAlias.h"
69#include "llvm/IR/GlobalValue.h"
70#include "llvm/IR/GlobalVariable.h"
71#include "llvm/IR/IRBuilder.h"
72#include "llvm/IR/InlineAsm.h"
73#include "llvm/IR/Instruction.h"
74#include "llvm/IR/Instructions.h"
75#include "llvm/IR/IntrinsicInst.h"
76#include "llvm/IR/Intrinsics.h"
77#include "llvm/IR/Module.h"
78#include "llvm/IR/Type.h"
79#include "llvm/IR/User.h"
80#include "llvm/IR/Value.h"
81#include "llvm/MC/MCInstrDesc.h"
82#include "llvm/MC/MCInstrItineraries.h"
83#include "llvm/MC/MCRegisterInfo.h"
84#include "llvm/MC/MCSchedule.h"
85#include "llvm/Support/AtomicOrdering.h"
86#include "llvm/Support/BranchProbability.h"
87#include "llvm/Support/Casting.h"
88#include "llvm/Support/CodeGen.h"
89#include "llvm/Support/CommandLine.h"
90#include "llvm/Support/Compiler.h"
91#include "llvm/Support/Debug.h"
92#include "llvm/Support/ErrorHandling.h"
93#include "llvm/Support/KnownBits.h"
94#include "llvm/Support/MathExtras.h"
95#include "llvm/Support/raw_ostream.h"
96#include "llvm/Target/TargetInstrInfo.h"
97#include "llvm/Target/TargetMachine.h"
98#include "llvm/Target/TargetOptions.h"
99#include <algorithm>
100#include <cassert>
101#include <cstdint>
102#include <cstdlib>
103#include <iterator>
104#include <limits>
105#include <string>
106#include <tuple>
107#include <utility>
108#include <vector>
109
110using namespace llvm;
111
112#define DEBUG_TYPE"arm-isel" "arm-isel"
113
114STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
115STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt"
, "Number of GAs materialized with movw + movt", {0}, false}
;
116STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals"
, "Number of loops generated for byval arguments", {0}, false
}
;
117STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
, {0}, false}
118 "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
, {0}, false}
;
119
120static cl::opt<bool>
121ARMInterworking("arm-interworking", cl::Hidden,
122 cl::desc("Enable / disable ARM interworking (for debugging only)"),
123 cl::init(true));
124
125static cl::opt<bool> EnableConstpoolPromotion(
126 "arm-promote-constant", cl::Hidden,
127 cl::desc("Enable / disable promotion of unnamed_addr constants into "
128 "constant pools"),
129 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
130static cl::opt<unsigned> ConstpoolPromotionMaxSize(
131 "arm-promote-constant-max-size", cl::Hidden,
132 cl::desc("Maximum size of constant to promote into a constant pool"),
133 cl::init(64));
134static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
135 "arm-promote-constant-max-total", cl::Hidden,
136 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
137 cl::init(128));
138
139// The APCS parameter registers.
140static const MCPhysReg GPRArgRegs[] = {
141 ARM::R0, ARM::R1, ARM::R2, ARM::R3
142};
143
144void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
145 MVT PromotedBitwiseVT) {
146 if (VT != PromotedLdStVT) {
147 setOperationAction(ISD::LOAD, VT, Promote);
148 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
149
150 setOperationAction(ISD::STORE, VT, Promote);
151 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
152 }
153
154 MVT ElemTy = VT.getVectorElementType();
155 if (ElemTy != MVT::f64)
156 setOperationAction(ISD::SETCC, VT, Custom);
157 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
159 if (ElemTy == MVT::i32) {
160 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
161 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
162 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
163 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
164 } else {
165 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
166 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
167 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
168 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
169 }
170 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
171 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
172 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
173 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
174 setOperationAction(ISD::SELECT, VT, Expand);
175 setOperationAction(ISD::SELECT_CC, VT, Expand);
176 setOperationAction(ISD::VSELECT, VT, Expand);
177 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
178 if (VT.isInteger()) {
179 setOperationAction(ISD::SHL, VT, Custom);
180 setOperationAction(ISD::SRA, VT, Custom);
181 setOperationAction(ISD::SRL, VT, Custom);
182 }
183
184 // Promote all bit-wise operations.
185 if (VT.isInteger() && VT != PromotedBitwiseVT) {
186 setOperationAction(ISD::AND, VT, Promote);
187 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
188 setOperationAction(ISD::OR, VT, Promote);
189 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
190 setOperationAction(ISD::XOR, VT, Promote);
191 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
192 }
193
194 // Neon does not support vector divide/remainder operations.
195 setOperationAction(ISD::SDIV, VT, Expand);
196 setOperationAction(ISD::UDIV, VT, Expand);
197 setOperationAction(ISD::FDIV, VT, Expand);
198 setOperationAction(ISD::SREM, VT, Expand);
199 setOperationAction(ISD::UREM, VT, Expand);
200 setOperationAction(ISD::FREM, VT, Expand);
201
202 if (!VT.isFloatingPoint() &&
203 VT != MVT::v2i64 && VT != MVT::v1i64)
204 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
205 setOperationAction(Opcode, VT, Legal);
206}
207
208void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
209 addRegisterClass(VT, &ARM::DPRRegClass);
210 addTypeForNEON(VT, MVT::f64, MVT::v2i32);
211}
212
213void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
214 addRegisterClass(VT, &ARM::DPairRegClass);
215 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
216}
217
218ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
219 const ARMSubtarget &STI)
220 : TargetLowering(TM), Subtarget(&STI) {
221 RegInfo = Subtarget->getRegisterInfo();
222 Itins = Subtarget->getInstrItineraryData();
223
224 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
225
226 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
227 !Subtarget->isTargetWatchOS()) {
228 const auto &E = Subtarget->getTargetTriple().getEnvironment();
229
230 bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF ||
231 E == Triple::MuslEABIHF;
232 // Windows is a special case. Technically, we will replace all of the "GNU"
233 // calls with calls to MSVCRT if appropriate and adjust the calling
234 // convention then.
235 IsHFTarget = IsHFTarget || Subtarget->isTargetWindows();
236
237 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
238 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
239 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
240 : CallingConv::ARM_AAPCS);
241 }
242
243 if (Subtarget->isTargetMachO()) {
244 // Uses VFP for Thumb libfuncs if available.
245 if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
246 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
247 static const struct {
Excessive padding in 'struct (anonymous at /tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp:247:20)' (8 padding bytes, where 0 is optimal). Optimal fields order: Name, Op, Cond, consider reordering the fields or adding explicit padding members
248 const RTLIB::Libcall Op;
249 const char * const Name;
250 const ISD::CondCode Cond;
251 } LibraryCalls[] = {
252 // Single-precision floating-point arithmetic.
253 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
254 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
255 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
256 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
257
258 // Double-precision floating-point arithmetic.
259 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
260 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
261 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
262 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
263
264 // Single-precision comparisons.
265 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
266 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
267 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
268 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
269 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
270 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
271 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
272 { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
273
274 // Double-precision comparisons.
275 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
276 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
277 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
278 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
279 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
280 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
281 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
282 { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
283
284 // Floating-point to integer conversions.
285 // i64 conversions are done via library routines even when generating VFP
286 // instructions, so use the same ones.
287 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
288 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
289 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
290 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
291
292 // Conversions between floating types.
293 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
294 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
295
296 // Integer to floating-point conversions.
297 // i64 conversions are done via library routines even when generating VFP
298 // instructions, so use the same ones.
299 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
300 // e.g., __floatunsidf vs. __floatunssidfvfp.
301 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
302 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
303 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
304 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
305 };
306
307 for (const auto &LC : LibraryCalls) {
308 setLibcallName(LC.Op, LC.Name);
309 if (LC.Cond != ISD::SETCC_INVALID)
310 setCmpLibcallCC(LC.Op, LC.Cond);
311 }
312 }
313
314 // Set the correct calling convention for ARMv7k WatchOS. It's just
315 // AAPCS_VFP for functions as simple as libcalls.
316 if (Subtarget->isTargetWatchABI()) {
317 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
318 setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
319 }
320 }
321
322 // These libcalls are not available in 32-bit.
323 setLibcallName(RTLIB::SHL_I128, nullptr);
324 setLibcallName(RTLIB::SRL_I128, nullptr);
325 setLibcallName(RTLIB::SRA_I128, nullptr);
326
327 // RTLIB
328 if (Subtarget->isAAPCS_ABI() &&
329 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
330 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
331 static const struct {
332 const RTLIB::Libcall Op;
333 const char * const Name;
334 const CallingConv::ID CC;
335 const ISD::CondCode Cond;
336 } LibraryCalls[] = {
337 // Double-precision floating-point arithmetic helper functions
338 // RTABI chapter 4.1.2, Table 2
339 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
340 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
341 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
342 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
343
344 // Double-precision floating-point comparison helper functions
345 // RTABI chapter 4.1.2, Table 3
346 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
347 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
348 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
349 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
350 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
351 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
352 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
353 { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
354
355 // Single-precision floating-point arithmetic helper functions
356 // RTABI chapter 4.1.2, Table 4
357 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
358 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
359 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
360 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
361
362 // Single-precision floating-point comparison helper functions
363 // RTABI chapter 4.1.2, Table 5
364 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
365 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
366 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
367 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
368 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
369 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
370 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
371 { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
372
373 // Floating-point to integer conversions.
374 // RTABI chapter 4.1.2, Table 6
375 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
376 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
377 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
381 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
382 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
383
384 // Conversions between floating types.
385 // RTABI chapter 4.1.2, Table 7
386 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
388 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
389
390 // Integer to floating-point conversions.
391 // RTABI chapter 4.1.2, Table 8
392 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
393 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
394 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
399 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
400
401 // Long long helper functions
402 // RTABI chapter 4.2, Table 9
403 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
406 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
407
408 // Integer division functions
409 // RTABI chapter 4.3.1
410 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
411 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
412 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
413 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
414 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
415 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
416 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
417 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
418 };
419
420 for (const auto &LC : LibraryCalls) {
421 setLibcallName(LC.Op, LC.Name);
422 setLibcallCallingConv(LC.Op, LC.CC);
423 if (LC.Cond != ISD::SETCC_INVALID)
424 setCmpLibcallCC(LC.Op, LC.Cond);
425 }
426
427 // EABI dependent RTLIB
428 if (TM.Options.EABIVersion == EABI::EABI4 ||
429 TM.Options.EABIVersion == EABI::EABI5) {
430 static const struct {
431 const RTLIB::Libcall Op;
432 const char *const Name;
433 const CallingConv::ID CC;
434 const ISD::CondCode Cond;
435 } MemOpsLibraryCalls[] = {
436 // Memory operations
437 // RTABI chapter 4.3.4
438 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
439 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
440 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
441 };
442
443 for (const auto &LC : MemOpsLibraryCalls) {
444 setLibcallName(LC.Op, LC.Name);
445 setLibcallCallingConv(LC.Op, LC.CC);
446 if (LC.Cond != ISD::SETCC_INVALID)
447 setCmpLibcallCC(LC.Op, LC.Cond);
448 }
449 }
450 }
451
452 if (Subtarget->isTargetWindows()) {
453 static const struct {
454 const RTLIB::Libcall Op;
455 const char * const Name;
456 const CallingConv::ID CC;
457 } LibraryCalls[] = {
458 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
459 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
460 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
461 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
462 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
463 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
464 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
465 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
466 };
467
468 for (const auto &LC : LibraryCalls) {
469 setLibcallName(LC.Op, LC.Name);
470 setLibcallCallingConv(LC.Op, LC.CC);
471 }
472 }
473
474 // Use divmod compiler-rt calls for iOS 5.0 and later.
475 if (Subtarget->isTargetMachO() &&
476 !(Subtarget->isTargetIOS() &&
477 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
478 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
479 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
480 }
481
482 // The half <-> float conversion functions are always soft-float on
483 // non-watchos platforms, but are needed for some targets which use a
484 // hard-float calling convention by default.
485 if (!Subtarget->isTargetWatchABI()) {
486 if (Subtarget->isAAPCS_ABI()) {
487 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
488 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
489 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
490 } else {
491 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
492 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
493 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
494 }
495 }
496
497 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
498 // a __gnu_ prefix (which is the default).
499 if (Subtarget->isTargetAEABI()) {
500 static const struct {
501 const RTLIB::Libcall Op;
502 const char * const Name;
503 const CallingConv::ID CC;
504 } LibraryCalls[] = {
505 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
506 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
507 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
508 };
509
510 for (const auto &LC : LibraryCalls) {
511 setLibcallName(LC.Op, LC.Name);
512 setLibcallCallingConv(LC.Op, LC.CC);
513 }
514 }
515
516 if (Subtarget->isThumb1Only())
517 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
518 else
519 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
520
521 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
522 !Subtarget->isThumb1Only()) {
523 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
524 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
525 }
526
527 for (MVT VT : MVT::vector_valuetypes()) {
528 for (MVT InnerVT : MVT::vector_valuetypes()) {
529 setTruncStoreAction(VT, InnerVT, Expand);
530 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
531 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
532 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
533 }
534
535 setOperationAction(ISD::MULHS, VT, Expand);
536 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
537 setOperationAction(ISD::MULHU, VT, Expand);
538 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
539
540 setOperationAction(ISD::BSWAP, VT, Expand);
541 }
542
543 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
544 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
545
546 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
547 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
548
549 if (Subtarget->hasNEON()) {
550 addDRTypeForNEON(MVT::v2f32);
551 addDRTypeForNEON(MVT::v8i8);
552 addDRTypeForNEON(MVT::v4i16);
553 addDRTypeForNEON(MVT::v2i32);
554 addDRTypeForNEON(MVT::v1i64);
555
556 addQRTypeForNEON(MVT::v4f32);
557 addQRTypeForNEON(MVT::v2f64);
558 addQRTypeForNEON(MVT::v16i8);
559 addQRTypeForNEON(MVT::v8i16);
560 addQRTypeForNEON(MVT::v4i32);
561 addQRTypeForNEON(MVT::v2i64);
562
563 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
564 // neither Neon nor VFP support any arithmetic operations on it.
565 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
566 // supported for v4f32.
567 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
568 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
569 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
570 // FIXME: Code duplication: FDIV and FREM are expanded always, see
571 // ARMTargetLowering::addTypeForNEON method for details.
572 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
573 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
574 // FIXME: Create unittest.
575 // In another words, find a way when "copysign" appears in DAG with vector
576 // operands.
577 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
578 // FIXME: Code duplication: SETCC has custom operation action, see
579 // ARMTargetLowering::addTypeForNEON method for details.
580 setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
581 // FIXME: Create unittest for FNEG and for FABS.
582 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
583 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
584 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
585 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
586 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
587 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
588 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
589 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
590 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
591 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
592 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
593 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
594 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
595 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
596 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
597 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
598 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
599 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
600
601 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
602 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
603 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
604 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
605 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
606 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
607 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
608 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
609 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
610 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
611 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
612 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
613 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
614 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
615
616 // Mark v2f32 intrinsics.
617 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
618 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
619 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
620 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
621 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
622 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
623 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
624 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
625 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
626 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
627 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
628 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
629 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
630 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
631
632 // Neon does not support some operations on v1i64 and v2i64 types.
633 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
634 // Custom handling for some quad-vector types to detect VMULL.
635 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
636 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
637 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
638 // Custom handling for some vector types to avoid expensive expansions
639 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
640 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
641 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
642 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
643 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
644 // a destination type that is wider than the source, and nor does
645 // it have a FP_TO_[SU]INT instruction with a narrower destination than
646 // source.
647 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
648 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
649 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
650 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
651
652 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
653 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
654
655 // NEON does not have single instruction CTPOP for vectors with element
656 // types wider than 8-bits. However, custom lowering can leverage the
657 // v8i8/v16i8 vcnt instruction.
658 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
659 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
660 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
661 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
662 setOperationAction(ISD::CTPOP, MVT::v1i64, Expand);
663 setOperationAction(ISD::CTPOP, MVT::v2i64, Expand);
664
665 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
666 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
667
668 // NEON does not have single instruction CTTZ for vectors.
669 setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
670 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
671 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
672 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
673
674 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
675 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
676 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
677 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
678
679 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
680 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
681 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
682 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
683
684 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
685 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
686 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
687 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
688
689 // NEON only has FMA instructions as of VFP4.
690 if (!Subtarget->hasVFP4()) {
691 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
692 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
693 }
694
695 setTargetDAGCombine(ISD::INTRINSIC_VOID);
696 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
697 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
698 setTargetDAGCombine(ISD::SHL);
699 setTargetDAGCombine(ISD::SRL);
700 setTargetDAGCombine(ISD::SRA);
701 setTargetDAGCombine(ISD::SIGN_EXTEND);
702 setTargetDAGCombine(ISD::ZERO_EXTEND);
703 setTargetDAGCombine(ISD::ANY_EXTEND);
704 setTargetDAGCombine(ISD::BUILD_VECTOR);
705 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
706 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
707 setTargetDAGCombine(ISD::STORE);
708 setTargetDAGCombine(ISD::FP_TO_SINT);
709 setTargetDAGCombine(ISD::FP_TO_UINT);
710 setTargetDAGCombine(ISD::FDIV);
711 setTargetDAGCombine(ISD::LOAD);
712
713 // It is legal to extload from v4i8 to v4i16 or v4i32.
714 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
715 MVT::v2i32}) {
716 for (MVT VT : MVT::integer_vector_valuetypes()) {
717 setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
718 setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
719 setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
720 }
721 }
722 }
723
724 if (Subtarget->isFPOnlySP()) {
725 // When targeting a floating-point unit with only single-precision
726 // operations, f64 is legal for the few double-precision instructions which
727 // are present However, no double-precision operations other than moves,
728 // loads and stores are provided by the hardware.
729 setOperationAction(ISD::FADD, MVT::f64, Expand);
730 setOperationAction(ISD::FSUB, MVT::f64, Expand);
731 setOperationAction(ISD::FMUL, MVT::f64, Expand);
732 setOperationAction(ISD::FMA, MVT::f64, Expand);
733 setOperationAction(ISD::FDIV, MVT::f64, Expand);
734 setOperationAction(ISD::FREM, MVT::f64, Expand);
735 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
736 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
737 setOperationAction(ISD::FNEG, MVT::f64, Expand);
738 setOperationAction(ISD::FABS, MVT::f64, Expand);
739 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
740 setOperationAction(ISD::FSIN, MVT::f64, Expand);
741 setOperationAction(ISD::FCOS, MVT::f64, Expand);
742 setOperationAction(ISD::FPOW, MVT::f64, Expand);
743 setOperationAction(ISD::FLOG, MVT::f64, Expand);
744 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
745 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
746 setOperationAction(ISD::FEXP, MVT::f64, Expand);
747 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
748 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
749 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
750 setOperationAction(ISD::FRINT, MVT::f64, Expand);
751 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
752 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
753 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
754 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
755 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
756 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
757 setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
758 setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
759 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
760 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
761 }
762
763 computeRegisterProperties(Subtarget->getRegisterInfo());
764
765 // ARM does not have floating-point extending loads.
766 for (MVT VT : MVT::fp_valuetypes()) {
767 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
768 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
769 }
770
771 // ... or truncating stores
772 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
773 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
774 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
775
776 // ARM does not have i1 sign extending load.
777 for (MVT VT : MVT::integer_valuetypes())
778 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
779
780 // ARM supports all 4 flavors of integer indexed load / store.
781 if (!Subtarget->isThumb1Only()) {
782 for (unsigned im = (unsigned)ISD::PRE_INC;
783 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
784 setIndexedLoadAction(im, MVT::i1, Legal);
785 setIndexedLoadAction(im, MVT::i8, Legal);
786 setIndexedLoadAction(im, MVT::i16, Legal);
787 setIndexedLoadAction(im, MVT::i32, Legal);
788 setIndexedStoreAction(im, MVT::i1, Legal);
789 setIndexedStoreAction(im, MVT::i8, Legal);
790 setIndexedStoreAction(im, MVT::i16, Legal);
791 setIndexedStoreAction(im, MVT::i32, Legal);
792 }
793 } else {
794 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
795 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
796 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
797 }
798
799 setOperationAction(ISD::SADDO, MVT::i32, Custom);
800 setOperationAction(ISD::UADDO, MVT::i32, Custom);
801 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
802 setOperationAction(ISD::USUBO, MVT::i32, Custom);
803
804 // i64 operation support.
805 setOperationAction(ISD::MUL, MVT::i64, Expand);
806 setOperationAction(ISD::MULHU, MVT::i32, Expand);
807 if (Subtarget->isThumb1Only()) {
808 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
809 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
810 }
811 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
812 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
813 setOperationAction(ISD::MULHS, MVT::i32, Expand);
814
815 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
816 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
817 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
818 setOperationAction(ISD::SRL, MVT::i64, Custom);
819 setOperationAction(ISD::SRA, MVT::i64, Custom);
820 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
821
822 setOperationAction(ISD::ADDC, MVT::i32, Custom);
823 setOperationAction(ISD::ADDE, MVT::i32, Custom);
824 setOperationAction(ISD::SUBC, MVT::i32, Custom);
825 setOperationAction(ISD::SUBE, MVT::i32, Custom);
826
827 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
828 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
829
830 // ARM does not have ROTL.
831 setOperationAction(ISD::ROTL, MVT::i32, Expand);
832 for (MVT VT : MVT::vector_valuetypes()) {
833 setOperationAction(ISD::ROTL, VT, Expand);
834 setOperationAction(ISD::ROTR, VT, Expand);
835 }
836 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
837 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
838 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
839 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
840
841 // @llvm.readcyclecounter requires the Performance Monitors extension.
842 // Default to the 0 expansion on unsupported platforms.
843 // FIXME: Technically there are older ARM CPUs that have
844 // implementation-specific ways of obtaining this information.
845 if (Subtarget->hasPerfMon())
846 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
847
848 // Only ARMv6 has BSWAP.
849 if (!Subtarget->hasV6Ops())
850 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
851
852 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
853 : Subtarget->hasDivideInARMMode();
854 if (!hasDivide) {
855 // These are expanded into libcalls if the cpu doesn't have HW divider.
856 setOperationAction(ISD::SDIV, MVT::i32, LibCall);
857 setOperationAction(ISD::UDIV, MVT::i32, LibCall);
858 }
859
860 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
861 setOperationAction(ISD::SDIV, MVT::i32, Custom);
862 setOperationAction(ISD::UDIV, MVT::i32, Custom);
863
864 setOperationAction(ISD::SDIV, MVT::i64, Custom);
865 setOperationAction(ISD::UDIV, MVT::i64, Custom);
866 }
867
868 setOperationAction(ISD::SREM, MVT::i32, Expand);
869 setOperationAction(ISD::UREM, MVT::i32, Expand);
870
871 // Register based DivRem for AEABI (RTABI 4.2)
872 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
873 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
874 Subtarget->isTargetWindows()) {
875 setOperationAction(ISD::SREM, MVT::i64, Custom);
876 setOperationAction(ISD::UREM, MVT::i64, Custom);
877 HasStandaloneRem = false;
878
879 if (Subtarget->isTargetWindows()) {
880 const struct {
881 const RTLIB::Libcall Op;
882 const char * const Name;
883 const CallingConv::ID CC;
884 } LibraryCalls[] = {
885 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
886 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
887 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
888 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
889
890 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
891 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
892 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
893 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
894 };
895
896 for (const auto &LC : LibraryCalls) {
897 setLibcallName(LC.Op, LC.Name);
898 setLibcallCallingConv(LC.Op, LC.CC);
899 }
900 } else {
901 const struct {
902 const RTLIB::Libcall Op;
903 const char * const Name;
904 const CallingConv::ID CC;
905 } LibraryCalls[] = {
906 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
907 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
908 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
909 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
910
911 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
912 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
913 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
914 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
915 };
916
917 for (const auto &LC : LibraryCalls) {
918 setLibcallName(LC.Op, LC.Name);
919 setLibcallCallingConv(LC.Op, LC.CC);
920 }
921 }
922
923 setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
924 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
925 setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
926 setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
927 } else {
928 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
929 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
930 }
931
932 if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
933 for (auto &VT : {MVT::f32, MVT::f64})
934 setOperationAction(ISD::FPOWI, VT, Custom);
935
936 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
937 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
938 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
939 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
940
941 setOperationAction(ISD::TRAP, MVT::Other, Legal);
942
943 // Use the default implementation.
944 setOperationAction(ISD::VASTART, MVT::Other, Custom);
945 setOperationAction(ISD::VAARG, MVT::Other, Expand);
946 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
947 setOperationAction(ISD::VAEND, MVT::Other, Expand);
948 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
949 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
950
951 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
952 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
953 else
954 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
955
956 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
957 // the default expansion.
958 InsertFencesForAtomic = false;
959 if (Subtarget->hasAnyDataBarrier() &&
960 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
961 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
962 // to ldrex/strex loops already.
963 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
964 if (!Subtarget->isThumb() || !Subtarget->isMClass())
965 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
966
967 // On v8, we have particularly efficient implementations of atomic fences
968 // if they can be combined with nearby atomic loads and stores.
969 if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
970 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
971 InsertFencesForAtomic = true;
972 }
973 } else {
974 // If there's anything we can use as a barrier, go through custom lowering
975 // for ATOMIC_FENCE.
976 // If target has DMB in thumb, Fences can be inserted.
977 if (Subtarget->hasDataBarrier())
978 InsertFencesForAtomic = true;
979
980 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
981 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
982
983 // Set them all for expansion, which will force libcalls.
984 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
985 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
986 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
987 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
988 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
989 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
990 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
991 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
992 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
993 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
994 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
995 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
996 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
997 // Unordered/Monotonic case.
998 if (!InsertFencesForAtomic) {
999 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1000 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1001 }
1002 }
1003
1004 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1005
1006 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1007 if (!Subtarget->hasV6Ops()) {
1008 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
1009 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
1010 }
1011 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1012
1013 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1014 !Subtarget->isThumb1Only()) {
1015 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1016 // iff target supports vfp2.
1017 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1018 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
1019 }
1020
1021 // We want to custom lower some of our intrinsics.
1022 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1023 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
1024 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
1025 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
1026 if (Subtarget->useSjLjEH())
1027 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1028
1029 setOperationAction(ISD::SETCC, MVT::i32, Expand);
1030 setOperationAction(ISD::SETCC, MVT::f32, Expand);
1031 setOperationAction(ISD::SETCC, MVT::f64, Expand);
1032 setOperationAction(ISD::SELECT, MVT::i32, Custom);
1033 setOperationAction(ISD::SELECT, MVT::f32, Custom);
1034 setOperationAction(ISD::SELECT, MVT::f64, Custom);
1035 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1036 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
1037 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
1038
1039 // Thumb-1 cannot currently select ARMISD::SUBE.
1040 if (!Subtarget->isThumb1Only())
1041 setOperationAction(ISD::SETCCE, MVT::i32, Custom);
1042
1043 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
1044 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1045 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1046 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1047 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1048
1049 // We don't support sin/cos/fmod/copysign/pow
1050 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1051 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1052 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1053 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1054 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1055 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1056 setOperationAction(ISD::FREM, MVT::f64, Expand);
1057 setOperationAction(ISD::FREM, MVT::f32, Expand);
1058 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1059 !Subtarget->isThumb1Only()) {
1060 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
1061 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
1062 }
1063 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1064 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1065
1066 if (!Subtarget->hasVFP4()) {
1067 setOperationAction(ISD::FMA, MVT::f64, Expand);
1068 setOperationAction(ISD::FMA, MVT::f32, Expand);
1069 }
1070
1071 // Various VFP goodness
1072 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1073 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1074 if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1075 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1076 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1077 }
1078
1079 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1080 if (!Subtarget->hasFP16()) {
1081 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1082 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1083 }
1084 }
1085
1086 // Combine sin / cos into one node or libcall if possible.
1087 if (Subtarget->hasSinCos()) {
1088 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1089 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1090 if (Subtarget->isTargetWatchABI()) {
1091 setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
1092 setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
1093 }
1094 if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
1095 // For iOS, we don't want to the normal expansion of a libcall to
1096 // sincos. We want to issue a libcall to __sincos_stret.
1097 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1098 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1099 }
1100 }
1101
1102 // FP-ARMv8 implements a lot of rounding-like FP operations.
1103 if (Subtarget->hasFPARMv8()) {
1104 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1105 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1106 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1107 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1108 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1109 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1110 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1111 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1112 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1113 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1114 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1115 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1116
1117 if (!Subtarget->isFPOnlySP()) {
1118 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1119 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1120 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1121 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1122 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1123 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1124 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1125 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1126 }
1127 }
1128
1129 if (Subtarget->hasNEON()) {
1130 // vmin and vmax aren't available in a scalar form, so we use
1131 // a NEON instruction with an undef lane instead.
1132 setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
1133 setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
1134 setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
1135 setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
1136 setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
1137 setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
1138 }
1139
1140 // We have target-specific dag combine patterns for the following nodes:
1141 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1142 setTargetDAGCombine(ISD::ADD);
1143 setTargetDAGCombine(ISD::SUB);
1144 setTargetDAGCombine(ISD::MUL);
1145 setTargetDAGCombine(ISD::AND);
1146 setTargetDAGCombine(ISD::OR);
1147 setTargetDAGCombine(ISD::XOR);
1148
1149 if (Subtarget->hasV6Ops())
1150 setTargetDAGCombine(ISD::SRL);
1151
1152 setStackPointerRegisterToSaveRestore(ARM::SP);
1153
1154 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1155 !Subtarget->hasVFP2())
1156 setSchedulingPreference(Sched::RegPressure);
1157 else
1158 setSchedulingPreference(Sched::Hybrid);
1159
1160 //// temporary - rewrite interface to use type
1161 MaxStoresPerMemset = 8;
1162 MaxStoresPerMemsetOptSize = 4;
1163 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1164 MaxStoresPerMemcpyOptSize = 2;
1165 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1166 MaxStoresPerMemmoveOptSize = 2;
1167
1168 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1169 // are at least 4 bytes aligned.
1170 setMinStackArgumentAlignment(4);
1171
1172 // Prefer likely predicted branches to selects on out-of-order cores.
1173 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1174
1175 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1176}
1177
1178bool ARMTargetLowering::useSoftFloat() const {
1179 return Subtarget->useSoftFloat();
1180}
1181
1182// FIXME: It might make sense to define the representative register class as the
1183// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1184// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1185// SPR's representative would be DPR_VFP2. This should work well if register
1186// pressure tracking were modified such that a register use would increment the
1187// pressure of the register class's representative and all of it's super
1188// classes' representatives transitively. We have not implemented this because
1189// of the difficulty prior to coalescing of modeling operand register classes
1190// due to the common occurrence of cross class copies and subregister insertions
1191// and extractions.
1192std::pair<const TargetRegisterClass *, uint8_t>
1193ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1194 MVT VT) const {
1195 const TargetRegisterClass *RRC = nullptr;
1196 uint8_t Cost = 1;
1197 switch (VT.SimpleTy) {
1198 default:
1199 return TargetLowering::findRepresentativeClass(TRI, VT);
1200 // Use DPR as representative register class for all floating point
1201 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1202 // the cost is 1 for both f32 and f64.
1203 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1204 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1205 RRC = &ARM::DPRRegClass;
1206 // When NEON is used for SP, only half of the register file is available
1207 // because operations that define both SP and DP results will be constrained
1208 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1209 // coalescing by double-counting the SP regs. See the FIXME above.
1210 if (Subtarget->useNEONForSinglePrecisionFP())
1211 Cost = 2;
1212 break;
1213 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1214 case MVT::v4f32: case MVT::v2f64:
1215 RRC = &ARM::DPRRegClass;
1216 Cost = 2;
1217 break;
1218 case MVT::v4i64:
1219 RRC = &ARM::DPRRegClass;
1220 Cost = 4;
1221 break;
1222 case MVT::v8i64:
1223 RRC = &ARM::DPRRegClass;
1224 Cost = 8;
1225 break;
1226 }
1227 return std::make_pair(RRC, Cost);
1228}
1229
1230const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1231 switch ((ARMISD::NodeType)Opcode) {
1232 case ARMISD::FIRST_NUMBER: break;
1233 case ARMISD::Wrapper: return "ARMISD::Wrapper";
1234 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1235 case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1236 case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1237 case ARMISD::CALL: return "ARMISD::CALL";
1238 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1239 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1240 case ARMISD::BRCOND: return "ARMISD::BRCOND";
1241 case ARMISD::BR_JT: return "ARMISD::BR_JT";
1242 case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1243 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1244 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1245 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1246 case ARMISD::CMP: return "ARMISD::CMP";
1247 case ARMISD::CMN: return "ARMISD::CMN";
1248 case ARMISD::CMPZ: return "ARMISD::CMPZ";
1249 case ARMISD::CMPFP: return "ARMISD::CMPFP";
1250 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1251 case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1252 case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1253
1254 case ARMISD::CMOV: return "ARMISD::CMOV";
1255
1256 case ARMISD::SSAT: return "ARMISD::SSAT";
1257
1258 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1259 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1260 case ARMISD::RRX: return "ARMISD::RRX";
1261
1262 case ARMISD::ADDC: return "ARMISD::ADDC";
1263 case ARMISD::ADDE: return "ARMISD::ADDE";
1264 case ARMISD::SUBC: return "ARMISD::SUBC";
1265 case ARMISD::SUBE: return "ARMISD::SUBE";
1266
1267 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1268 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1269
1270 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1271 case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1272 case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1273
1274 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1275
1276 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1277
1278 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1279
1280 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1281
1282 case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1283
1284 case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1285 case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1286
1287 case ARMISD::VCEQ: return "ARMISD::VCEQ";
1288 case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1289 case ARMISD::VCGE: return "ARMISD::VCGE";
1290 case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1291 case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1292 case ARMISD::VCGEU: return "ARMISD::VCGEU";
1293 case ARMISD::VCGT: return "ARMISD::VCGT";
1294 case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1295 case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1296 case ARMISD::VCGTU: return "ARMISD::VCGTU";
1297 case ARMISD::VTST: return "ARMISD::VTST";
1298
1299 case ARMISD::VSHL: return "ARMISD::VSHL";
1300 case ARMISD::VSHRs: return "ARMISD::VSHRs";
1301 case ARMISD::VSHRu: return "ARMISD::VSHRu";
1302 case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1303 case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1304 case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1305 case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1306 case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1307 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1308 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1309 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1310 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1311 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1312 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1313 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1314 case ARMISD::VSLI: return "ARMISD::VSLI";
1315 case ARMISD::VSRI: return "ARMISD::VSRI";
1316 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1317 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1318 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1319 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1320 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1321 case ARMISD::VDUP: return "ARMISD::VDUP";
1322 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1323 case ARMISD::VEXT: return "ARMISD::VEXT";
1324 case ARMISD::VREV64: return "ARMISD::VREV64";
1325 case ARMISD::VREV32: return "ARMISD::VREV32";
1326 case ARMISD::VREV16: return "ARMISD::VREV16";
1327 case ARMISD::VZIP: return "ARMISD::VZIP";
1328 case ARMISD::VUZP: return "ARMISD::VUZP";
1329 case ARMISD::VTRN: return "ARMISD::VTRN";
1330 case ARMISD::VTBL1: return "ARMISD::VTBL1";
1331 case ARMISD::VTBL2: return "ARMISD::VTBL2";
1332 case ARMISD::VMULLs: return "ARMISD::VMULLs";
1333 case ARMISD::VMULLu: return "ARMISD::VMULLu";
1334 case ARMISD::UMAAL: return "ARMISD::UMAAL";
1335 case ARMISD::UMLAL: return "ARMISD::UMLAL";
1336 case ARMISD::SMLAL: return "ARMISD::SMLAL";
1337 case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1338 case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1339 case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1340 case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1341 case ARMISD::SMULWB: return "ARMISD::SMULWB";
1342 case ARMISD::SMULWT: return "ARMISD::SMULWT";
1343 case ARMISD::SMLALD: return "ARMISD::SMLALD";
1344 case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1345 case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1346 case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1347 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1348 case ARMISD::BFI: return "ARMISD::BFI";
1349 case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1350 case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1351 case ARMISD::VBSL: return "ARMISD::VBSL";
1352 case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1353 case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1354 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1355 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1356 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1357 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1358 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1359 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1360 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1361 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1362 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1363 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1364 case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1365 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1366 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1367 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1368 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1369 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1370 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1371 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1372 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1373 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1374 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1375 }
1376 return nullptr;
1377}
1378
1379EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1380 EVT VT) const {
1381 if (!VT.isVector())
1382 return getPointerTy(DL);
1383 return VT.changeVectorElementTypeToInteger();
1384}
1385
1386/// getRegClassFor - Return the register class that should be used for the
1387/// specified value type.
1388const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
1389 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1390 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1391 // load / store 4 to 8 consecutive D registers.
1392 if (Subtarget->hasNEON()) {
1393 if (VT == MVT::v4i64)
1394 return &ARM::QQPRRegClass;
1395 if (VT == MVT::v8i64)
1396 return &ARM::QQQQPRRegClass;
1397 }
1398 return TargetLowering::getRegClassFor(VT);
1399}
1400
1401// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1402// source/dest is aligned and the copy size is large enough. We therefore want
1403// to align such objects passed to memory intrinsics.
1404bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
1405 unsigned &PrefAlign) const {
1406 if (!isa<MemIntrinsic>(CI))
1407 return false;
1408 MinSize = 8;
1409 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1410 // cycle faster than 4-byte aligned LDM.
1411 PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1412 return true;
1413}
1414
1415// Create a fast isel object.
1416FastISel *
1417ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1418 const TargetLibraryInfo *libInfo) const {
1419 return ARM::createFastISel(funcInfo, libInfo);
1420}
1421
1422Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1423 unsigned NumVals = N->getNumValues();
1424 if (!NumVals)
1425 return Sched::RegPressure;
1426
1427 for (unsigned i = 0; i != NumVals; ++i) {
1428 EVT VT = N->getValueType(i);
1429 if (VT == MVT::Glue || VT == MVT::Other)
1430 continue;
1431 if (VT.isFloatingPoint() || VT.isVector())
1432 return Sched::ILP;
1433 }
1434
1435 if (!N->isMachineOpcode())
1436 return Sched::RegPressure;
1437
1438 // Load are scheduled for latency even if there instruction itinerary
1439 // is not available.
1440 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1441 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1442
1443 if (MCID.getNumDefs() == 0)
1444 return Sched::RegPressure;
1445 if (!Itins->isEmpty() &&
1446 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1447 return Sched::ILP;
1448
1449 return Sched::RegPressure;
1450}
1451
1452//===----------------------------------------------------------------------===//
1453// Lowering Code
1454//===----------------------------------------------------------------------===//
1455
1456static bool isSRL16(const SDValue &Op) {
1457 if (Op.getOpcode() != ISD::SRL)
1458 return false;
1459 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1460 return Const->getZExtValue() == 16;
1461 return false;
1462}
1463
1464static bool isSRA16(const SDValue &Op) {
1465 if (Op.getOpcode() != ISD::SRA)
1466 return false;
1467 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1468 return Const->getZExtValue() == 16;
1469 return false;
1470}
1471
1472static bool isSHL16(const SDValue &Op) {
1473 if (Op.getOpcode() != ISD::SHL)
1474 return false;
1475 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1476 return Const->getZExtValue() == 16;
1477 return false;
1478}
1479
1480// Check for a signed 16-bit value. We special case SRA because it makes it
1481// more simple when also looking for SRAs that aren't sign extending a
1482// smaller value. Without the check, we'd need to take extra care with
1483// checking order for some operations.
1484static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1485 if (isSRA16(Op))
1486 return isSHL16(Op.getOperand(0));
1487 return DAG.ComputeNumSignBits(Op) == 17;
1488}
1489
1490/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1491static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1492 switch (CC) {
1493 default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1493)
;
1494 case ISD::SETNE: return ARMCC::NE;
1495 case ISD::SETEQ: return ARMCC::EQ;
1496 case ISD::SETGT: return ARMCC::GT;
1497 case ISD::SETGE: return ARMCC::GE;
1498 case ISD::SETLT: return ARMCC::LT;
1499 case ISD::SETLE: return ARMCC::LE;
1500 case ISD::SETUGT: return ARMCC::HI;
1501 case ISD::SETUGE: return ARMCC::HS;
1502 case ISD::SETULT: return ARMCC::LO;
1503 case ISD::SETULE: return ARMCC::LS;
1504 }
1505}
1506
1507/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1508static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1509 ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1510 CondCode2 = ARMCC::AL;
1511 InvalidOnQNaN = true;
1512 switch (CC) {
1513 default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1513)
;
1514 case ISD::SETEQ:
1515 case ISD::SETOEQ:
1516 CondCode = ARMCC::EQ;
1517 InvalidOnQNaN = false;
1518 break;
1519 case ISD::SETGT:
1520 case ISD::SETOGT: CondCode = ARMCC::GT; break;
1521 case ISD::SETGE:
1522 case ISD::SETOGE: CondCode = ARMCC::GE; break;
1523 case ISD::SETOLT: CondCode = ARMCC::MI; break;
1524 case ISD::SETOLE: CondCode = ARMCC::LS; break;
1525 case ISD::SETONE:
1526 CondCode = ARMCC::MI;
1527 CondCode2 = ARMCC::GT;
1528 InvalidOnQNaN = false;
1529 break;
1530 case ISD::SETO: CondCode = ARMCC::VC; break;
1531 case ISD::SETUO: CondCode = ARMCC::VS; break;
1532 case ISD::SETUEQ:
1533 CondCode = ARMCC::EQ;
1534 CondCode2 = ARMCC::VS;
1535 InvalidOnQNaN = false;
1536 break;
1537 case ISD::SETUGT: CondCode = ARMCC::HI; break;
1538 case ISD::SETUGE: CondCode = ARMCC::PL; break;
1539 case ISD::SETLT:
1540 case ISD::SETULT: CondCode = ARMCC::LT; break;
1541 case ISD::SETLE:
1542 case ISD::SETULE: CondCode = ARMCC::LE; break;
1543 case ISD::SETNE:
1544 case ISD::SETUNE:
1545 CondCode = ARMCC::NE;
1546 InvalidOnQNaN = false;
1547 break;
1548 }
1549}
1550
1551//===----------------------------------------------------------------------===//
1552// Calling Convention Implementation
1553//===----------------------------------------------------------------------===//
1554
1555#include "ARMGenCallingConv.inc"
1556
1557/// getEffectiveCallingConv - Get the effective calling convention, taking into
1558/// account presence of floating point hardware and calling convention
1559/// limitations, such as support for variadic functions.
1560CallingConv::ID
1561ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1562 bool isVarArg) const {
1563 switch (CC) {
1564 default:
1565 llvm_unreachable("Unsupported calling convention")::llvm::llvm_unreachable_internal("Unsupported calling convention"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1565)
;
1566 case CallingConv::ARM_AAPCS:
1567 case CallingConv::ARM_APCS:
1568 case CallingConv::GHC:
1569 return CC;
1570 case CallingConv::PreserveMost:
1571 return CallingConv::PreserveMost;
1572 case CallingConv::ARM_AAPCS_VFP:
1573 case CallingConv::Swift:
1574 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
1575 case CallingConv::C:
1576 if (!Subtarget->isAAPCS_ABI())
1577 return CallingConv::ARM_APCS;
1578 else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1579 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1580 !isVarArg)
1581 return CallingConv::ARM_AAPCS_VFP;
1582 else
1583 return CallingConv::ARM_AAPCS;
1584 case CallingConv::Fast:
1585 case CallingConv::CXX_FAST_TLS:
1586 if (!Subtarget->isAAPCS_ABI()) {
1587 if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1588 return CallingConv::Fast;
1589 return CallingConv::ARM_APCS;
1590 } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1591 return CallingConv::ARM_AAPCS_VFP;
1592 else
1593 return CallingConv::ARM_AAPCS;
1594 }
1595}
1596
1597CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
1598 bool isVarArg) const {
1599 return CCAssignFnForNode(CC, false, isVarArg);
1600}
1601
1602CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
1603 bool isVarArg) const {
1604 return CCAssignFnForNode(CC, true, isVarArg);
1605}
1606
1607/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1608/// CallingConvention.
1609CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1610 bool Return,
1611 bool isVarArg) const {
1612 switch (getEffectiveCallingConv(CC, isVarArg)) {
1613 default:
1614 llvm_unreachable("Unsupported calling convention")::llvm::llvm_unreachable_internal("Unsupported calling convention"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1614)
;
1615 case CallingConv::ARM_APCS:
1616 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1617 case CallingConv::ARM_AAPCS:
1618 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1619 case CallingConv::ARM_AAPCS_VFP:
1620 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1621 case CallingConv::Fast:
1622 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1623 case CallingConv::GHC:
1624 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1625 case CallingConv::PreserveMost:
1626 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1627 }
1628}
1629
1630/// LowerCallResult - Lower the result values of a call into the
1631/// appropriate copies out of appropriate physical registers.
1632SDValue ARMTargetLowering::LowerCallResult(
1633 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1634 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1635 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1636 SDValue ThisVal) const {
1637
1638 // Assign locations to each value returned by this call.
1639 SmallVector<CCValAssign, 16> RVLocs;
1640 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1641 *DAG.getContext());
1642 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1643
1644 // Copy all of the result registers out of their specified physreg.
1645 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1646 CCValAssign VA = RVLocs[i];
1647
1648 // Pass 'this' value directly from the argument to return value, to avoid
1649 // reg unit interference
1650 if (i == 0 && isThisReturn) {
1651 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1652, __PRETTY_FUNCTION__))
1652 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1652, __PRETTY_FUNCTION__))
;
1653 InVals.push_back(ThisVal);
1654 continue;
1655 }
1656
1657 SDValue Val;
1658 if (VA.needsCustom()) {
1659 // Handle f64 or half of a v2f64.
1660 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1661 InFlag);
1662 Chain = Lo.getValue(1);
1663 InFlag = Lo.getValue(2);
1664 VA = RVLocs[++i]; // skip ahead to next loc
1665 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1666 InFlag);
1667 Chain = Hi.getValue(1);
1668 InFlag = Hi.getValue(2);
1669 if (!Subtarget->isLittle())
1670 std::swap (Lo, Hi);
1671 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1672
1673 if (VA.getLocVT() == MVT::v2f64) {
1674 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1675 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1676 DAG.getConstant(0, dl, MVT::i32));
1677
1678 VA = RVLocs[++i]; // skip ahead to next loc
1679 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1680 Chain = Lo.getValue(1);
1681 InFlag = Lo.getValue(2);
1682 VA = RVLocs[++i]; // skip ahead to next loc
1683 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1684 Chain = Hi.getValue(1);
1685 InFlag = Hi.getValue(2);
1686 if (!Subtarget->isLittle())
1687 std::swap (Lo, Hi);
1688 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1689 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1690 DAG.getConstant(1, dl, MVT::i32));
1691 }
1692 } else {
1693 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1694 InFlag);
1695 Chain = Val.getValue(1);
1696 InFlag = Val.getValue(2);
1697 }
1698
1699 switch (VA.getLocInfo()) {
1700 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1700)
;
1701 case CCValAssign::Full: break;
1702 case CCValAssign::BCvt:
1703 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1704 break;
1705 }
1706
1707 InVals.push_back(Val);
1708 }
1709
1710 return Chain;
1711}
1712
1713/// LowerMemOpCallTo - Store the argument to the stack.
1714SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1715 SDValue Arg, const SDLoc &dl,
1716 SelectionDAG &DAG,
1717 const CCValAssign &VA,
1718 ISD::ArgFlagsTy Flags) const {
1719 unsigned LocMemOffset = VA.getLocMemOffset();
1720 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1721 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1722 StackPtr, PtrOff);
1723 return DAG.getStore(
1724 Chain, dl, Arg, PtrOff,
1725 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1726}
1727
1728void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1729 SDValue Chain, SDValue &Arg,
1730 RegsToPassVector &RegsToPass,
1731 CCValAssign &VA, CCValAssign &NextVA,
1732 SDValue &StackPtr,
1733 SmallVectorImpl<SDValue> &MemOpChains,
1734 ISD::ArgFlagsTy Flags) const {
1735
1736 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1737 DAG.getVTList(MVT::i32, MVT::i32), Arg);
1738 unsigned id = Subtarget->isLittle() ? 0 : 1;
1739 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1740
1741 if (NextVA.isRegLoc())
1742 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1743 else {
1744 assert(NextVA.isMemLoc())((NextVA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("NextVA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1744, __PRETTY_FUNCTION__))
;
1745 if (!StackPtr.getNode())
1746 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1747 getPointerTy(DAG.getDataLayout()));
1748
1749 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1750 dl, DAG, NextVA,
1751 Flags));
1752 }
1753}
1754
1755/// LowerCall - Lowering a call into a callseq_start <-
1756/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1757/// nodes.
1758SDValue
1759ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1760 SmallVectorImpl<SDValue> &InVals) const {
1761 SelectionDAG &DAG = CLI.DAG;
1762 SDLoc &dl = CLI.DL;
1763 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1764 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1765 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1766 SDValue Chain = CLI.Chain;
1767 SDValue Callee = CLI.Callee;
1768 bool &isTailCall = CLI.IsTailCall;
1769 CallingConv::ID CallConv = CLI.CallConv;
1770 bool doesNotRet = CLI.DoesNotReturn;
1771 bool isVarArg = CLI.IsVarArg;
1772
1773 MachineFunction &MF = DAG.getMachineFunction();
1774 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1775 bool isThisReturn = false;
1776 bool isSibCall = false;
1777 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
1778
1779 // Disable tail calls if they're not supported.
1780 if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1781 isTailCall = false;
1782
1783 if (isTailCall) {
1784 // Check if it's really possible to do a tail call.
1785 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1786 isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
1787 Outs, OutVals, Ins, DAG);
1788 if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
1789 report_fatal_error("failed to perform tail call elimination on a call "
1790 "site marked musttail");
1791 // We don't support GuaranteedTailCallOpt for ARM, only automatically
1792 // detected sibcalls.
1793 if (isTailCall) {
1794 ++NumTailCalls;
1795 isSibCall = true;
1796 }
1797 }
1798
1799 // Analyze operands of the call, assigning locations to each operand.
1800 SmallVector<CCValAssign, 16> ArgLocs;
1801 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1802 *DAG.getContext());
1803 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1804
1805 // Get a count of how many bytes are to be pushed on the stack.
1806 unsigned NumBytes = CCInfo.getNextStackOffset();
1807
1808 // For tail calls, memory operands are available in our caller's stack.
1809 if (isSibCall)
1810 NumBytes = 0;
1811
1812 // Adjust the stack pointer for the new arguments...
1813 // These operations are automatically eliminated by the prolog/epilog pass
1814 if (!isSibCall)
1815 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1816
1817 SDValue StackPtr =
1818 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1819
1820 RegsToPassVector RegsToPass;
1821 SmallVector<SDValue, 8> MemOpChains;
1822
1823 // Walk the register/memloc assignments, inserting copies/loads. In the case
1824 // of tail call optimization, arguments are handled later.
1825 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1826 i != e;
1827 ++i, ++realArgIdx) {
1828 CCValAssign &VA = ArgLocs[i];
1829 SDValue Arg = OutVals[realArgIdx];
1830 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1831 bool isByVal = Flags.isByVal();
1832
1833 // Promote the value if needed.
1834 switch (VA.getLocInfo()) {
1835 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1835)
;
1836 case CCValAssign::Full: break;
1837 case CCValAssign::SExt:
1838 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1839 break;
1840 case CCValAssign::ZExt:
1841 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1842 break;
1843 case CCValAssign::AExt:
1844 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1845 break;
1846 case CCValAssign::BCvt:
1847 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1848 break;
1849 }
1850
1851 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1852 if (VA.needsCustom()) {
1853 if (VA.getLocVT() == MVT::v2f64) {
1854 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1855 DAG.getConstant(0, dl, MVT::i32));
1856 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1857 DAG.getConstant(1, dl, MVT::i32));
1858
1859 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1860 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1861
1862 VA = ArgLocs[++i]; // skip ahead to next loc
1863 if (VA.isRegLoc()) {
1864 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1865 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1866 } else {
1867 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1867, __PRETTY_FUNCTION__))
;
1868
1869 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1870 dl, DAG, VA, Flags));
1871 }
1872 } else {
1873 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1874 StackPtr, MemOpChains, Flags);
1875 }
1876 } else if (VA.isRegLoc()) {
1877 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1878 Outs[0].VT == MVT::i32) {
1879 assert(VA.getLocVT() == MVT::i32 &&((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1880, __PRETTY_FUNCTION__))
1880 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1880, __PRETTY_FUNCTION__))
;
1881 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1882, __PRETTY_FUNCTION__))
1882 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1882, __PRETTY_FUNCTION__))
;
1883 isThisReturn = true;
1884 }
1885 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1886 } else if (isByVal) {
1887 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1887, __PRETTY_FUNCTION__))
;
1888 unsigned offset = 0;
1889
1890 // True if this byval aggregate will be split between registers
1891 // and memory.
1892 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1893 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1894
1895 if (CurByValIdx < ByValArgsCount) {
1896
1897 unsigned RegBegin, RegEnd;
1898 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1899
1900 EVT PtrVT =
1901 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
1902 unsigned int i, j;
1903 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1904 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1905 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1906 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1907 MachinePointerInfo(),
1908 DAG.InferPtrAlignment(AddArg));
1909 MemOpChains.push_back(Load.getValue(1));
1910 RegsToPass.push_back(std::make_pair(j, Load));
1911 }
1912
1913 // If parameter size outsides register area, "offset" value
1914 // helps us to calculate stack slot for remained part properly.
1915 offset = RegEnd - RegBegin;
1916
1917 CCInfo.nextInRegsParam();
1918 }
1919
1920 if (Flags.getByValSize() > 4*offset) {
1921 auto PtrVT = getPointerTy(DAG.getDataLayout());
1922 unsigned LocMemOffset = VA.getLocMemOffset();
1923 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1924 SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1925 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1926 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1927 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1928 MVT::i32);
1929 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1930 MVT::i32);
1931
1932 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1933 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1934 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1935 Ops));
1936 }
1937 } else if (!isSibCall) {
1938 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1938, __PRETTY_FUNCTION__))
;
1939
1940 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1941 dl, DAG, VA, Flags));
1942 }
1943 }
1944
1945 if (!MemOpChains.empty())
1946 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1947
1948 // Build a sequence of copy-to-reg nodes chained together with token chain
1949 // and flag operands which copy the outgoing args into the appropriate regs.
1950 SDValue InFlag;
1951 // Tail call byval lowering might overwrite argument registers so in case of
1952 // tail call optimization the copies to registers are lowered later.
1953 if (!isTailCall)
1954 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1955 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1956 RegsToPass[i].second, InFlag);
1957 InFlag = Chain.getValue(1);
1958 }
1959
1960 // For tail calls lower the arguments to the 'real' stack slot.
1961 if (isTailCall) {
1962 // Force all the incoming stack arguments to be loaded from the stack
1963 // before any new outgoing arguments are stored to the stack, because the
1964 // outgoing stack slots may alias the incoming argument stack slots, and
1965 // the alias isn't otherwise explicit. This is slightly more conservative
1966 // than necessary, because it means that each store effectively depends
1967 // on every argument instead of just those arguments it would clobber.
1968
1969 // Do not flag preceding copytoreg stuff together with the following stuff.
1970 InFlag = SDValue();
1971 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1972 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1973 RegsToPass[i].second, InFlag);
1974 InFlag = Chain.getValue(1);
1975 }
1976 InFlag = SDValue();
1977 }
1978
1979 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1980 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1981 // node so that legalize doesn't hack it.
1982 bool isDirect = false;
1983
1984 const TargetMachine &TM = getTargetMachine();
1985 const Module *Mod = MF.getFunction()->getParent();
1986 const GlobalValue *GV = nullptr;
1987 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1988 GV = G->getGlobal();
1989 bool isStub =
1990 !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
1991
1992 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1993 bool isLocalARMFunc = false;
1994 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1995 auto PtrVt = getPointerTy(DAG.getDataLayout());
1996
1997 if (Subtarget->genLongCalls()) {
1998 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(((!isPositionIndependent() || Subtarget->isTargetWindows(
)) && "long-calls codegen is not position independent!"
) ? static_cast<void> (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1999, __PRETTY_FUNCTION__))
1999 "long-calls codegen is not position independent!")(((!isPositionIndependent() || Subtarget->isTargetWindows(
)) && "long-calls codegen is not position independent!"
) ? static_cast<void> (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 1999, __PRETTY_FUNCTION__))
;
2000 // Handle a global address or an external symbol. If it's not one of
2001 // those, the target's already in a register, so we don't need to do
2002 // anything extra.
2003 if (isa<GlobalAddressSDNode>(Callee)) {
2004 // Create a constant pool entry for the callee address
2005 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2006 ARMConstantPoolValue *CPV =
2007 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2008
2009 // Get the address of the callee into a register
2010 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2011 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2012 Callee = DAG.getLoad(
2013 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2014 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2015 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2016 const char *Sym = S->getSymbol();
2017
2018 // Create a constant pool entry for the callee address
2019 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2020 ARMConstantPoolValue *CPV =
2021 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2022 ARMPCLabelIndex, 0);
2023 // Get the address of the callee into a register
2024 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2025 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2026 Callee = DAG.getLoad(
2027 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2028 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2029 }
2030 } else if (isa<GlobalAddressSDNode>(Callee)) {
2031 // If we're optimizing for minimum size and the function is called three or
2032 // more times in this block, we can improve codesize by calling indirectly
2033 // as BLXr has a 16-bit encoding.
2034 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2035 auto *BB = CLI.CS->getParent();
2036 bool PreferIndirect =
2037 Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
2038 count_if(GV->users(), [&BB](const User *U) {
2039 return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2040 }) > 2;
2041
2042 if (!PreferIndirect) {
2043 isDirect = true;
2044 bool isDef = GV->isStrongDefinitionForLinker();
2045
2046 // ARM call to a local ARM function is predicable.
2047 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2048 // tBX takes a register source operand.
2049 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2050 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")((Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2050, __PRETTY_FUNCTION__))
;
2051 Callee = DAG.getNode(
2052 ARMISD::WrapperPIC, dl, PtrVt,
2053 DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2054 Callee = DAG.getLoad(
2055 PtrVt, dl, DAG.getEntryNode(), Callee,
2056 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2057 /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2058 MachineMemOperand::MOInvariant);
2059 } else if (Subtarget->isTargetCOFF()) {
2060 assert(Subtarget->isTargetWindows() &&((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2061, __PRETTY_FUNCTION__))
2061 "Windows is the only supported COFF target")((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2061, __PRETTY_FUNCTION__))
;
2062 unsigned TargetFlags = GV->hasDLLImportStorageClass()
2063 ? ARMII::MO_DLLIMPORT
2064 : ARMII::MO_NO_FLAG;
2065 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2066 TargetFlags);
2067 if (GV->hasDLLImportStorageClass())
2068 Callee =
2069 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2070 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2071 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2072 } else {
2073 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2074 }
2075 }
2076 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2077 isDirect = true;
2078 // tBX takes a register source operand.
2079 const char *Sym = S->getSymbol();
2080 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2081 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2082 ARMConstantPoolValue *CPV =
2083 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2084 ARMPCLabelIndex, 4);
2085 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2086 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2087 Callee = DAG.getLoad(
2088 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2089 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2090 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2091 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2092 } else {
2093 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2094 }
2095 }
2096
2097 // FIXME: handle tail calls differently.
2098 unsigned CallOpc;
2099 if (Subtarget->isThumb()) {
2100 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2101 CallOpc = ARMISD::CALL_NOLINK;
2102 else
2103 CallOpc = ARMISD::CALL;
2104 } else {
2105 if (!isDirect && !Subtarget->hasV5TOps())
2106 CallOpc = ARMISD::CALL_NOLINK;
2107 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2108 // Emit regular call when code size is the priority
2109 !MF.getFunction()->optForMinSize())
2110 // "mov lr, pc; b _foo" to avoid confusing the RSP
2111 CallOpc = ARMISD::CALL_NOLINK;
2112 else
2113 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2114 }
2115
2116 std::vector<SDValue> Ops;
2117 Ops.push_back(Chain);
2118 Ops.push_back(Callee);
2119
2120 // Add argument registers to the end of the list so that they are known live
2121 // into the call.
2122 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2123 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2124 RegsToPass[i].second.getValueType()));
2125
2126 // Add a register mask operand representing the call-preserved registers.
2127 if (!isTailCall) {
2128 const uint32_t *Mask;
2129 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2130 if (isThisReturn) {
2131 // For 'this' returns, use the R0-preserving mask if applicable
2132 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2133 if (!Mask) {
2134 // Set isThisReturn to false if the calling convention is not one that
2135 // allows 'returned' to be modeled in this way, so LowerCallResult does
2136 // not try to pass 'this' straight through
2137 isThisReturn = false;
2138 Mask = ARI->getCallPreservedMask(MF, CallConv);
2139 }
2140 } else
2141 Mask = ARI->getCallPreservedMask(MF, CallConv);
2142
2143 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2143, __PRETTY_FUNCTION__))
;
2144 Ops.push_back(DAG.getRegisterMask(Mask));
2145 }
2146
2147 if (InFlag.getNode())
2148 Ops.push_back(InFlag);
2149
2150 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2151 if (isTailCall) {
2152 MF.getFrameInfo().setHasTailCall();
2153 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2154 }
2155
2156 // Returns a chain and a flag for retval copy to use.
2157 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2158 InFlag = Chain.getValue(1);
2159
2160 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2161 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2162 if (!Ins.empty())
2163 InFlag = Chain.getValue(1);
2164
2165 // Handle result values, copying them out of physregs into vregs that we
2166 // return.
2167 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2168 InVals, isThisReturn,
2169 isThisReturn ? OutVals[0] : SDValue());
2170}
2171
2172/// HandleByVal - Every parameter *after* a byval parameter is passed
2173/// on the stack. Remember the next parameter register to allocate,
2174/// and then confiscate the rest of the parameter registers to insure
2175/// this.
2176void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2177 unsigned Align) const {
2178 // Byval (as with any stack) slots are always at least 4 byte aligned.
2179 Align = std::max(Align, 4U);
2180
2181 unsigned Reg = State->AllocateReg(GPRArgRegs);
2182 if (!Reg)
2183 return;
2184
2185 unsigned AlignInRegs = Align / 4;
2186 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2187 for (unsigned i = 0; i < Waste; ++i)
2188 Reg = State->AllocateReg(GPRArgRegs);
2189
2190 if (!Reg)
2191 return;
2192
2193 unsigned Excess = 4 * (ARM::R4 - Reg);
2194
2195 // Special case when NSAA != SP and parameter size greater than size of
2196 // all remained GPR regs. In that case we can't split parameter, we must
2197 // send it to stack. We also must set NCRN to R4, so waste all
2198 // remained registers.
2199 const unsigned NSAAOffset = State->getNextStackOffset();
2200 if (NSAAOffset != 0 && Size > Excess) {
2201 while (State->AllocateReg(GPRArgRegs))
2202 ;
2203 return;
2204 }
2205
2206 // First register for byval parameter is the first register that wasn't
2207 // allocated before this method call, so it would be "reg".
2208 // If parameter is small enough to be saved in range [reg, r4), then
2209 // the end (first after last) register would be reg + param-size-in-regs,
2210 // else parameter would be splitted between registers and stack,
2211 // end register would be r4 in this case.
2212 unsigned ByValRegBegin = Reg;
2213 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2214 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2215 // Note, first register is allocated in the beginning of function already,
2216 // allocate remained amount of registers we need.
2217 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2218 State->AllocateReg(GPRArgRegs);
2219 // A byval parameter that is split between registers and memory needs its
2220 // size truncated here.
2221 // In the case where the entire structure fits in registers, we set the
2222 // size in memory to zero.
2223 Size = std::max<int>(Size - Excess, 0);
2224}
2225
2226/// MatchingStackOffset - Return true if the given stack call argument is
2227/// already available in the same position (relatively) of the caller's
2228/// incoming argument stack.
2229static
2230bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2231 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2232 const TargetInstrInfo *TII) {
2233 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2234 int FI = std::numeric_limits<int>::max();
2235 if (Arg.getOpcode() == ISD::CopyFromReg) {
2236 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2237 if (!TargetRegisterInfo::isVirtualRegister(VR))
2238 return false;
2239 MachineInstr *Def = MRI->getVRegDef(VR);
2240 if (!Def)
2241 return false;
2242 if (!Flags.isByVal()) {
2243 if (!TII->isLoadFromStackSlot(*Def, FI))
2244 return false;
2245 } else {
2246 return false;
2247 }
2248 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2249 if (Flags.isByVal())
2250 // ByVal argument is passed in as a pointer but it's now being
2251 // dereferenced. e.g.
2252 // define @foo(%struct.X* %A) {
2253 // tail call @bar(%struct.X* byval %A)
2254 // }
2255 return false;
2256 SDValue Ptr = Ld->getBasePtr();
2257 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2258 if (!FINode)
2259 return false;
2260 FI = FINode->getIndex();
2261 } else
2262 return false;
2263
2264 assert(FI != std::numeric_limits<int>::max())((FI != std::numeric_limits<int>::max()) ? static_cast<
void> (0) : __assert_fail ("FI != std::numeric_limits<int>::max()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2264, __PRETTY_FUNCTION__))
;
2265 if (!MFI.isFixedObjectIndex(FI))
2266 return false;
2267 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2268}
2269
2270/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2271/// for tail call optimization. Targets which want to do tail call
2272/// optimization should implement this function.
2273bool
2274ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2275 CallingConv::ID CalleeCC,
2276 bool isVarArg,
2277 bool isCalleeStructRet,
2278 bool isCallerStructRet,
2279 const SmallVectorImpl<ISD::OutputArg> &Outs,
2280 const SmallVectorImpl<SDValue> &OutVals,
2281 const SmallVectorImpl<ISD::InputArg> &Ins,
2282 SelectionDAG& DAG) const {
2283 MachineFunction &MF = DAG.getMachineFunction();
2284 const Function *CallerF = MF.getFunction();
2285 CallingConv::ID CallerCC = CallerF->getCallingConv();
2286
2287 assert(Subtarget->supportsTailCall())((Subtarget->supportsTailCall()) ? static_cast<void>
(0) : __assert_fail ("Subtarget->supportsTailCall()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2287, __PRETTY_FUNCTION__))
;
2288
2289 // Look for obvious safe cases to perform tail call optimization that do not
2290 // require ABI changes. This is what gcc calls sibcall.
2291
2292 // Exception-handling functions need a special set of instructions to indicate
2293 // a return to the hardware. Tail-calling another function would probably
2294 // break this.
2295 if (CallerF->hasFnAttribute("interrupt"))
2296 return false;
2297
2298 // Also avoid sibcall optimization if either caller or callee uses struct
2299 // return semantics.
2300 if (isCalleeStructRet || isCallerStructRet)
2301 return false;
2302
2303 // Externally-defined functions with weak linkage should not be
2304 // tail-called on ARM when the OS does not support dynamic
2305 // pre-emption of symbols, as the AAELF spec requires normal calls
2306 // to undefined weak functions to be replaced with a NOP or jump to the
2307 // next instruction. The behaviour of branch instructions in this
2308 // situation (as used for tail calls) is implementation-defined, so we
2309 // cannot rely on the linker replacing the tail call with a return.
2310 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2311 const GlobalValue *GV = G->getGlobal();
2312 const Triple &TT = getTargetMachine().getTargetTriple();
2313 if (GV->hasExternalWeakLinkage() &&
2314 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2315 return false;
2316 }
2317
2318 // Check that the call results are passed in the same way.
2319 LLVMContext &C = *DAG.getContext();
2320 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2321 CCAssignFnForReturn(CalleeCC, isVarArg),
2322 CCAssignFnForReturn(CallerCC, isVarArg)))
2323 return false;
2324 // The callee has to preserve all registers the caller needs to preserve.
2325 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2326 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2327 if (CalleeCC != CallerCC) {
2328 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2329 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2330 return false;
2331 }
2332
2333 // If Caller's vararg or byval argument has been split between registers and
2334 // stack, do not perform tail call, since part of the argument is in caller's
2335 // local frame.
2336 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2337 if (AFI_Caller->getArgRegsSaveSize())
2338 return false;
2339
2340 // If the callee takes no arguments then go on to check the results of the
2341 // call.
2342 if (!Outs.empty()) {
2343 // Check if stack adjustment is needed. For now, do not do this if any
2344 // argument is passed on the stack.
2345 SmallVector<CCValAssign, 16> ArgLocs;
2346 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2347 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2348 if (CCInfo.getNextStackOffset()) {
2349 // Check if the arguments are already laid out in the right way as
2350 // the caller's fixed stack objects.
2351 MachineFrameInfo &MFI = MF.getFrameInfo();
2352 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2353 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2354 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2355 i != e;
2356 ++i, ++realArgIdx) {
2357 CCValAssign &VA = ArgLocs[i];
2358 EVT RegVT = VA.getLocVT();
2359 SDValue Arg = OutVals[realArgIdx];
2360 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2361 if (VA.getLocInfo() == CCValAssign::Indirect)
2362 return false;
2363 if (VA.needsCustom()) {
2364 // f64 and vector types are split into multiple registers or
2365 // register/stack-slot combinations. The types will not match
2366 // the registers; give up on memory f64 refs until we figure
2367 // out what to do about this.
2368 if (!VA.isRegLoc())
2369 return false;
2370 if (!ArgLocs[++i].isRegLoc())
2371 return false;
2372 if (RegVT == MVT::v2f64) {
2373 if (!ArgLocs[++i].isRegLoc())
2374 return false;
2375 if (!ArgLocs[++i].isRegLoc())
2376 return false;
2377 }
2378 } else if (!VA.isRegLoc()) {
2379 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2380 MFI, MRI, TII))
2381 return false;
2382 }
2383 }
2384 }
2385
2386 const MachineRegisterInfo &MRI = MF.getRegInfo();
2387 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2388 return false;
2389 }
2390
2391 return true;
2392}
2393
2394bool
2395ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2396 MachineFunction &MF, bool isVarArg,
2397 const SmallVectorImpl<ISD::OutputArg> &Outs,
2398 LLVMContext &Context) const {
2399 SmallVector<CCValAssign, 16> RVLocs;
2400 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2401 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2402}
2403
2404static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
2405 const SDLoc &DL, SelectionDAG &DAG) {
2406 const MachineFunction &MF = DAG.getMachineFunction();
2407 const Function *F = MF.getFunction();
2408
2409 StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
2410
2411 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2412 // version of the "preferred return address". These offsets affect the return
2413 // instruction if this is a return from PL1 without hypervisor extensions.
2414 // IRQ/FIQ: +4 "subs pc, lr, #4"
2415 // SWI: 0 "subs pc, lr, #0"
2416 // ABORT: +4 "subs pc, lr, #4"
2417 // UNDEF: +4/+2 "subs pc, lr, #0"
2418 // UNDEF varies depending on where the exception came from ARM or Thumb
2419 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2420
2421 int64_t LROffset;
2422 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2423 IntKind == "ABORT")
2424 LROffset = 4;
2425 else if (IntKind == "SWI" || IntKind == "UNDEF")
2426 LROffset = 0;
2427 else
2428 report_fatal_error("Unsupported interrupt attribute. If present, value "
2429 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2430
2431 RetOps.insert(RetOps.begin() + 1,
2432 DAG.getConstant(LROffset, DL, MVT::i32, false));
2433
2434 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2435}
2436
2437SDValue
2438ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2439 bool isVarArg,
2440 const SmallVectorImpl<ISD::OutputArg> &Outs,
2441 const SmallVectorImpl<SDValue> &OutVals,
2442 const SDLoc &dl, SelectionDAG &DAG) const {
2443
2444 // CCValAssign - represent the assignment of the return value to a location.
2445 SmallVector<CCValAssign, 16> RVLocs;
2446
2447 // CCState - Info about the registers and stack slots.
2448 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2449 *DAG.getContext());
2450
2451 // Analyze outgoing return values.
2452 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2453
2454 SDValue Flag;
2455 SmallVector<SDValue, 4> RetOps;
2456 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2457 bool isLittleEndian = Subtarget->isLittle();
2458
2459 MachineFunction &MF = DAG.getMachineFunction();
2460 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2461 AFI->setReturnRegsCount(RVLocs.size());
2462
2463 // Copy the result values into the output registers.
2464 for (unsigned i = 0, realRVLocIdx = 0;
2465 i != RVLocs.size();
2466 ++i, ++realRVLocIdx) {
2467 CCValAssign &VA = RVLocs[i];
2468 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2468, __PRETTY_FUNCTION__))
;
2469
2470 SDValue Arg = OutVals[realRVLocIdx];
2471
2472 switch (VA.getLocInfo()) {
2473 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2473)
;
2474 case CCValAssign::Full: break;
2475 case CCValAssign::BCvt:
2476 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2477 break;
2478 }
2479
2480 if (VA.needsCustom()) {
2481 if (VA.getLocVT() == MVT::v2f64) {
2482 // Extract the first half and return it in two registers.
2483 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2484 DAG.getConstant(0, dl, MVT::i32));
2485 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2486 DAG.getVTList(MVT::i32, MVT::i32), Half);
2487
2488 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2489 HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2490 Flag);
2491 Flag = Chain.getValue(1);
2492 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2493 VA = RVLocs[++i]; // skip ahead to next loc
2494 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2495 HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2496 Flag);
2497 Flag = Chain.getValue(1);
2498 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2499 VA = RVLocs[++i]; // skip ahead to next loc
2500
2501 // Extract the 2nd half and fall through to handle it as an f64 value.
2502 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2503 DAG.getConstant(1, dl, MVT::i32));
2504 }
2505 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2506 // available.
2507 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2508 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2509 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2510 fmrrd.getValue(isLittleEndian ? 0 : 1),
2511 Flag);
2512 Flag = Chain.getValue(1);
2513 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2514 VA = RVLocs[++i]; // skip ahead to next loc
2515 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2516 fmrrd.getValue(isLittleEndian ? 1 : 0),
2517 Flag);
2518 } else
2519 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2520
2521 // Guarantee that all emitted copies are
2522 // stuck together, avoiding something bad.
2523 Flag = Chain.getValue(1);
2524 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2525 }
2526 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2527 const MCPhysReg *I =
2528 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2529 if (I) {
2530 for (; *I; ++I) {
2531 if (ARM::GPRRegClass.contains(*I))
2532 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2533 else if (ARM::DPRRegClass.contains(*I))
2534 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2535 else
2536 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2536)
;
2537 }
2538 }
2539
2540 // Update chain and glue.
2541 RetOps[0] = Chain;
2542 if (Flag.getNode())
2543 RetOps.push_back(Flag);
2544
2545 // CPUs which aren't M-class use a special sequence to return from
2546 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2547 // though we use "subs pc, lr, #N").
2548 //
2549 // M-class CPUs actually use a normal return sequence with a special
2550 // (hardware-provided) value in LR, so the normal code path works.
2551 if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
2552 !Subtarget->isMClass()) {
2553 if (Subtarget->isThumb1Only())
2554 report_fatal_error("interrupt attribute is not supported in Thumb1");
2555 return LowerInterruptReturn(RetOps, dl, DAG);
2556 }
2557
2558 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2559}
2560
2561bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2562 if (N->getNumValues() != 1)
2563 return false;
2564 if (!N->hasNUsesOfValue(1, 0))
2565 return false;
2566
2567 SDValue TCChain = Chain;
2568 SDNode *Copy = *N->use_begin();
2569 if (Copy->getOpcode() == ISD::CopyToReg) {
2570 // If the copy has a glue operand, we conservatively assume it isn't safe to
2571 // perform a tail call.
2572 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2573 return false;
2574 TCChain = Copy->getOperand(0);
2575 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2576 SDNode *VMov = Copy;
2577 // f64 returned in a pair of GPRs.
2578 SmallPtrSet<SDNode*, 2> Copies;
2579 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2580 UI != UE; ++UI) {
2581 if (UI->getOpcode() != ISD::CopyToReg)
2582 return false;
2583 Copies.insert(*UI);
2584 }
2585 if (Copies.size() > 2)
2586 return false;
2587
2588 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2589 UI != UE; ++UI) {
2590 SDValue UseChain = UI->getOperand(0);
2591 if (Copies.count(UseChain.getNode()))
2592 // Second CopyToReg
2593 Copy = *UI;
2594 else {
2595 // We are at the top of this chain.
2596 // If the copy has a glue operand, we conservatively assume it
2597 // isn't safe to perform a tail call.
2598 if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2599 return false;
2600 // First CopyToReg
2601 TCChain = UseChain;
2602 }
2603 }
2604 } else if (Copy->getOpcode() == ISD::BITCAST) {
2605 // f32 returned in a single GPR.
2606 if (!Copy->hasOneUse())
2607 return false;
2608 Copy = *Copy->use_begin();
2609 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2610 return false;
2611 // If the copy has a glue operand, we conservatively assume it isn't safe to
2612 // perform a tail call.
2613 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2614 return false;
2615 TCChain = Copy->getOperand(0);
2616 } else {
2617 return false;
2618 }
2619
2620 bool HasRet = false;
2621 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2622 UI != UE; ++UI) {
2623 if (UI->getOpcode() != ARMISD::RET_FLAG &&
2624 UI->getOpcode() != ARMISD::INTRET_FLAG)
2625 return false;
2626 HasRet = true;
2627 }
2628
2629 if (!HasRet)
2630 return false;
2631
2632 Chain = TCChain;
2633 return true;
2634}
2635
2636bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2637 if (!Subtarget->supportsTailCall())
2638 return false;
2639
2640 auto Attr =
2641 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2642 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2643 return false;
2644
2645 return true;
2646}
2647
2648// Trying to write a 64 bit value so need to split into two 32 bit values first,
2649// and pass the lower and high parts through.
2650static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
2651 SDLoc DL(Op);
2652 SDValue WriteValue = Op->getOperand(2);
2653
2654 // This function is only supposed to be called for i64 type argument.
2655 assert(WriteValue.getValueType() == MVT::i64((WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? static_cast<void> (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2656, __PRETTY_FUNCTION__))
2656 && "LowerWRITE_REGISTER called for non-i64 type argument.")((WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? static_cast<void> (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2656, __PRETTY_FUNCTION__))
;
2657
2658 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2659 DAG.getConstant(0, DL, MVT::i32));
2660 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2661 DAG.getConstant(1, DL, MVT::i32));
2662 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2663 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2664}
2665
2666// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2667// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2668// one of the above mentioned nodes. It has to be wrapped because otherwise
2669// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2670// be used to form addressing mode. These wrapped nodes will be selected
2671// into MOVi.
2672SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2673 SelectionDAG &DAG) const {
2674 EVT PtrVT = Op.getValueType();
2675 // FIXME there is no actual debug info here
2676 SDLoc dl(Op);
2677 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2678 SDValue Res;
2679
2680 // When generating execute-only code Constant Pools must be promoted to the
2681 // global data section. It's a bit ugly that we can't share them across basic
2682 // blocks, but this way we guarantee that execute-only behaves correct with
2683 // position-independent addressing modes.
2684 if (Subtarget->genExecuteOnly()) {
2685 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2686 auto T = const_cast<Type*>(CP->getType());
2687 auto C = const_cast<Constant*>(CP->getConstVal());
2688 auto M = const_cast<Module*>(DAG.getMachineFunction().
2689 getFunction()->getParent());
2690 auto GV = new GlobalVariable(
2691 *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
2692 Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2693 Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2694 Twine(AFI->createPICLabelUId())
2695 );
2696 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2697 dl, PtrVT);
2698 return LowerGlobalAddress(GA, DAG);
2699 }
2700
2701 if (CP->isMachineConstantPoolEntry())
2702 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2703 CP->getAlignment());
2704 else
2705 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2706 CP->getAlignment());
2707 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2708}
2709
2710unsigned ARMTargetLowering::getJumpTableEncoding() const {
2711 return MachineJumpTableInfo::EK_Inline;
2712}
2713
2714SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2715 SelectionDAG &DAG) const {
2716 MachineFunction &MF = DAG.getMachineFunction();
2717 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2718 unsigned ARMPCLabelIndex = 0;
2719 SDLoc DL(Op);
2720 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2721 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2722 SDValue CPAddr;
2723 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2724 if (!IsPositionIndependent) {
2725 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2726 } else {
2727 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2728 ARMPCLabelIndex = AFI->createPICLabelUId();
2729 ARMConstantPoolValue *CPV =
2730 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2731 ARMCP::CPBlockAddress, PCAdj);
2732 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2733 }
2734 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2735 SDValue Result = DAG.getLoad(
2736 PtrVT, DL, DAG.getEntryNode(), CPAddr,
2737 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2738 if (!IsPositionIndependent)
2739 return Result;
2740 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2741 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2742}
2743
2744/// \brief Convert a TLS address reference into the correct sequence of loads
2745/// and calls to compute the variable's address for Darwin, and return an
2746/// SDValue containing the final node.
2747
2748/// Darwin only has one TLS scheme which must be capable of dealing with the
2749/// fully general situation, in the worst case. This means:
2750/// + "extern __thread" declaration.
2751/// + Defined in a possibly unknown dynamic library.
2752///
2753/// The general system is that each __thread variable has a [3 x i32] descriptor
2754/// which contains information used by the runtime to calculate the address. The
2755/// only part of this the compiler needs to know about is the first word, which
2756/// contains a function pointer that must be called with the address of the
2757/// entire descriptor in "r0".
2758///
2759/// Since this descriptor may be in a different unit, in general access must
2760/// proceed along the usual ARM rules. A common sequence to produce is:
2761///
2762/// movw rT1, :lower16:_var$non_lazy_ptr
2763/// movt rT1, :upper16:_var$non_lazy_ptr
2764/// ldr r0, [rT1]
2765/// ldr rT2, [r0]
2766/// blx rT2
2767/// [...address now in r0...]
2768SDValue
2769ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2770 SelectionDAG &DAG) const {
2771 assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin")((Subtarget->isTargetDarwin() && "TLS only supported on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"TLS only supported on Darwin\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2771, __PRETTY_FUNCTION__))
;
2772 SDLoc DL(Op);
2773
2774 // First step is to get the address of the actua global symbol. This is where
2775 // the TLS descriptor lives.
2776 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2777
2778 // The first entry in the descriptor is a function pointer that we must call
2779 // to obtain the address of the variable.
2780 SDValue Chain = DAG.getEntryNode();
2781 SDValue FuncTLVGet = DAG.getLoad(
2782 MVT::i32, DL, Chain, DescAddr,
2783 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2784 /* Alignment = */ 4,
2785 MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
2786 MachineMemOperand::MOInvariant);
2787 Chain = FuncTLVGet.getValue(1);
2788
2789 MachineFunction &F = DAG.getMachineFunction();
2790 MachineFrameInfo &MFI = F.getFrameInfo();
2791 MFI.setAdjustsStack(true);
2792
2793 // TLS calls preserve all registers except those that absolutely must be
2794 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2795 // silly).
2796 auto TRI =
2797 getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
2798 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2799 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2800
2801 // Finally, we can make the call. This is just a degenerate version of a
2802 // normal AArch64 call node: r0 takes the address of the descriptor, and
2803 // returns the address of the variable in this thread.
2804 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2805 Chain =
2806 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
2807 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2808 DAG.getRegisterMask(Mask), Chain.getValue(1));
2809 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2810}
2811
2812SDValue
2813ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2814 SelectionDAG &DAG) const {
2815 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")((Subtarget->isTargetWindows() && "Windows specific TLS lowering"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2815, __PRETTY_FUNCTION__))
;
2816
2817 SDValue Chain = DAG.getEntryNode();
2818 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2819 SDLoc DL(Op);
2820
2821 // Load the current TEB (thread environment block)
2822 SDValue Ops[] = {Chain,
2823 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2824 DAG.getConstant(15, DL, MVT::i32),
2825 DAG.getConstant(0, DL, MVT::i32),
2826 DAG.getConstant(13, DL, MVT::i32),
2827 DAG.getConstant(0, DL, MVT::i32),
2828 DAG.getConstant(2, DL, MVT::i32)};
2829 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2830 DAG.getVTList(MVT::i32, MVT::Other), Ops);
2831
2832 SDValue TEB = CurrentTEB.getValue(0);
2833 Chain = CurrentTEB.getValue(1);
2834
2835 // Load the ThreadLocalStoragePointer from the TEB
2836 // A pointer to the TLS array is located at offset 0x2c from the TEB.
2837 SDValue TLSArray =
2838 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2839 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2840
2841 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2842 // offset into the TLSArray.
2843
2844 // Load the TLS index from the C runtime
2845 SDValue TLSIndex =
2846 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2847 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2848 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2849
2850 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2851 DAG.getConstant(2, DL, MVT::i32));
2852 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2853 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2854 MachinePointerInfo());
2855
2856 // Get the offset of the start of the .tls section (section base)
2857 const auto *GA = cast<GlobalAddressSDNode>(Op);
2858 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2859 SDValue Offset = DAG.getLoad(
2860 PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2861 DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2862 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2863
2864 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2865}
2866
2867// Lower ISD::GlobalTLSAddress using the "general dynamic" model
2868SDValue
2869ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2870 SelectionDAG &DAG) const {
2871 SDLoc dl(GA);
2872 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2873 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2874 MachineFunction &MF = DAG.getMachineFunction();
2875 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2876 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2877 ARMConstantPoolValue *CPV =
2878 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2879 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2880 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2881 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2882 Argument = DAG.getLoad(
2883 PtrVT, dl, DAG.getEntryNode(), Argument,
2884 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2885 SDValue Chain = Argument.getValue(1);
2886
2887 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2888 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2889
2890 // call __tls_get_addr.
2891 ArgListTy Args;
2892 ArgListEntry Entry;
2893 Entry.Node = Argument;
2894 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2895 Args.push_back(Entry);
2896
2897 // FIXME: is there useful debug info available here?
2898 TargetLowering::CallLoweringInfo CLI(DAG);
2899 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2900 CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
2901 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2902
2903 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2904 return CallResult.first;
2905}
2906
2907// Lower ISD::GlobalTLSAddress using the "initial exec" or
2908// "local exec" model.
2909SDValue
2910ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2911 SelectionDAG &DAG,
2912 TLSModel::Model model) const {
2913 const GlobalValue *GV = GA->getGlobal();
2914 SDLoc dl(GA);
2915 SDValue Offset;
2916 SDValue Chain = DAG.getEntryNode();
2917 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2918 // Get the Thread Pointer
2919 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2920
2921 if (model == TLSModel::InitialExec) {
2922 MachineFunction &MF = DAG.getMachineFunction();
2923 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2924 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2925 // Initial exec model.
2926 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2927 ARMConstantPoolValue *CPV =
2928 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2929 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
2930 true);
2931 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2932 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2933 Offset = DAG.getLoad(
2934 PtrVT, dl, Chain, Offset,
2935 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2936 Chain = Offset.getValue(1);
2937
2938 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2939 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2940
2941 Offset = DAG.getLoad(
2942 PtrVT, dl, Chain, Offset,
2943 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2944 } else {
2945 // local exec model
2946 assert(model == TLSModel::LocalExec)((model == TLSModel::LocalExec) ? static_cast<void> (0)
: __assert_fail ("model == TLSModel::LocalExec", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2946, __PRETTY_FUNCTION__))
;
2947 ARMConstantPoolValue *CPV =
2948 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
2949 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2950 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2951 Offset = DAG.getLoad(
2952 PtrVT, dl, Chain, Offset,
2953 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2954 }
2955
2956 // The address of the thread local variable is the add of the thread
2957 // pointer with the offset of the variable.
2958 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2959}
2960
2961SDValue
2962ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2963 if (Subtarget->isTargetDarwin())
2964 return LowerGlobalTLSAddressDarwin(Op, DAG);
2965
2966 if (Subtarget->isTargetWindows())
2967 return LowerGlobalTLSAddressWindows(Op, DAG);
2968
2969 // TODO: implement the "local dynamic" model
2970 assert(Subtarget->isTargetELF() && "Only ELF implemented here")((Subtarget->isTargetELF() && "Only ELF implemented here"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2970, __PRETTY_FUNCTION__))
;
2971 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2972 if (DAG.getTarget().Options.EmulatedTLS)
2973 return LowerToTLSEmulatedModel(GA, DAG);
2974
2975 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2976
2977 switch (model) {
2978 case TLSModel::GeneralDynamic:
2979 case TLSModel::LocalDynamic:
2980 return LowerToTLSGeneralDynamicModel(GA, DAG);
2981 case TLSModel::InitialExec:
2982 case TLSModel::LocalExec:
2983 return LowerToTLSExecModels(GA, DAG, model);
2984 }
2985 llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 2985)
;
2986}
2987
2988/// Return true if all users of V are within function F, looking through
2989/// ConstantExprs.
2990static bool allUsersAreInFunction(const Value *V, const Function *F) {
2991 SmallVector<const User*,4> Worklist;
2992 for (auto *U : V->users())
2993 Worklist.push_back(U);
2994 while (!Worklist.empty()) {
2995 auto *U = Worklist.pop_back_val();
2996 if (isa<ConstantExpr>(U)) {
2997 for (auto *UU : U->users())
2998 Worklist.push_back(UU);
2999 continue;
3000 }
3001
3002 auto *I = dyn_cast<Instruction>(U);
3003 if (!I || I->getParent()->getParent() != F)
3004 return false;
3005 }
3006 return true;
3007}
3008
3009/// Return true if all users of V are within some (any) function, looking through
3010/// ConstantExprs. In other words, are there any global constant users?
3011static bool allUsersAreInFunctions(const Value *V) {
3012 SmallVector<const User*,4> Worklist;
3013 for (auto *U : V->users())
3014 Worklist.push_back(U);
3015 while (!Worklist.empty()) {
3016 auto *U = Worklist.pop_back_val();
3017 if (isa<ConstantExpr>(U)) {
3018 for (auto *UU : U->users())
3019 Worklist.push_back(UU);
3020 continue;
3021 }
3022
3023 if (!isa<Instruction>(U))
3024 return false;
3025 }
3026 return true;
3027}
3028
3029// Return true if T is an integer, float or an array/vector of either.
3030static bool isSimpleType(Type *T) {
3031 if (T->isIntegerTy() || T->isFloatingPointTy())
3032 return true;
3033 Type *SubT = nullptr;
3034 if (T->isArrayTy())
3035 SubT = T->getArrayElementType();
3036 else if (T->isVectorTy())
3037 SubT = T->getVectorElementType();
3038 else
3039 return false;
3040 return SubT->isIntegerTy() || SubT->isFloatingPointTy();
3041}
3042
3043static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
3044 EVT PtrVT, const SDLoc &dl) {
3045 // If we're creating a pool entry for a constant global with unnamed address,
3046 // and the global is small enough, we can emit it inline into the constant pool
3047 // to save ourselves an indirection.
3048 //
3049 // This is a win if the constant is only used in one function (so it doesn't
3050 // need to be duplicated) or duplicating the constant wouldn't increase code
3051 // size (implying the constant is no larger than 4 bytes).
3052 const Function *F = DAG.getMachineFunction().getFunction();
3053
3054 // We rely on this decision to inline being idemopotent and unrelated to the
3055 // use-site. We know that if we inline a variable at one use site, we'll
3056 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3057 // doesn't know about this optimization, so bail out if it's enabled else
3058 // we could decide to inline here (and thus never emit the GV) but require
3059 // the GV from fast-isel generated code.
3060 if (!EnableConstpoolPromotion ||
3061 DAG.getMachineFunction().getTarget().Options.EnableFastISel)
3062 return SDValue();
3063
3064 auto *GVar = dyn_cast<GlobalVariable>(GV);
3065 if (!GVar || !GVar->hasInitializer() ||
3066 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3067 !GVar->hasLocalLinkage())
3068 return SDValue();
3069
3070 // Ensure that we don't try and inline any type that contains pointers. If
3071 // we inline a value that contains relocations, we move the relocations from
3072 // .data to .text which is not ideal.
3073 auto *Init = GVar->getInitializer();
3074 if (!isSimpleType(Init->getType()))
3075 return SDValue();
3076
3077 // The constant islands pass can only really deal with alignment requests
3078 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3079 // any type wanting greater alignment requirements than 4 bytes. We also
3080 // can only promote constants that are multiples of 4 bytes in size or
3081 // are paddable to a multiple of 4. Currently we only try and pad constants
3082 // that are strings for simplicity.
3083 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3084 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3085 unsigned Align = GVar->getAlignment();
3086 unsigned RequiredPadding = 4 - (Size % 4);
3087 bool PaddingPossible =
3088 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3089 if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3090 Size == 0)
3091 return SDValue();
3092
3093 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3094 MachineFunction &MF = DAG.getMachineFunction();
3095 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3096
3097 // We can't bloat the constant pool too much, else the ConstantIslands pass
3098 // may fail to converge. If we haven't promoted this global yet (it may have
3099 // multiple uses), and promoting it would increase the constant pool size (Sz
3100 // > 4), ensure we have space to do so up to MaxTotal.
3101 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3102 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3103 ConstpoolPromotionMaxTotal)
3104 return SDValue();
3105
3106 // This is only valid if all users are in a single function OR it has users
3107 // in multiple functions but it no larger than a pointer. We also check if
3108 // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
3109 // address taken.
3110 if (!allUsersAreInFunction(GVar, F) &&
3111 !(Size <= 4 && allUsersAreInFunctions(GVar)))
3112 return SDValue();
3113
3114 // We're going to inline this global. Pad it out if needed.
3115 if (RequiredPadding != 4) {
3116 StringRef S = CDAInit->getAsString();
3117
3118 SmallVector<uint8_t,16> V(S.size());
3119 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3120 while (RequiredPadding--)
3121 V.push_back(0);
3122 Init = ConstantDataArray::get(*DAG.getContext(), V);
3123 }
3124
3125 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3126 SDValue CPAddr =
3127 DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3128 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3129 AFI->markGlobalAsPromotedToConstantPool(GVar);
3130 AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3131 PaddedSize - 4);
3132 }
3133 ++NumConstpoolPromoted;
3134 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3135}
3136
3137static bool isReadOnly(const GlobalValue *GV) {
3138 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3139 GV = GA->getBaseObject();
3140 return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3141 isa<Function>(GV);
3142}
3143
3144SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3145 SelectionDAG &DAG) const {
3146 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3147 default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3147)
;
3148 case Triple::COFF:
3149 return LowerGlobalAddressWindows(Op, DAG);
3150 case Triple::ELF:
3151 return LowerGlobalAddressELF(Op, DAG);
3152 case Triple::MachO:
3153 return LowerGlobalAddressDarwin(Op, DAG);
3154 }
3155}
3156
3157SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3158 SelectionDAG &DAG) const {
3159 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3160 SDLoc dl(Op);
3161 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3162 const TargetMachine &TM = getTargetMachine();
3163 bool IsRO = isReadOnly(GV);
3164
3165 // promoteToConstantPool only if not generating XO text section
3166 if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3167 if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
3168 return V;
3169
3170 if (isPositionIndependent()) {
3171 bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3172
3173 MachineFunction &MF = DAG.getMachineFunction();
3174 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3175 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3176 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3177 SDLoc dl(Op);
3178 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3179 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
3180 GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
3181 UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
3182 /*AddCurrentAddress=*/UseGOT_PREL);
3183 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3184 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3185 SDValue Result = DAG.getLoad(
3186 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3187 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3188 SDValue Chain = Result.getValue(1);
3189 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3190 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3191 if (UseGOT_PREL)
3192 Result =
3193 DAG.getLoad(PtrVT, dl, Chain, Result,
3194 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3195 return Result;
3196 } else if (Subtarget->isROPI() && IsRO) {
3197 // PC-relative.
3198 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3199 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3200 return Result;
3201 } else if (Subtarget->isRWPI() && !IsRO) {
3202 // SB-relative.
3203 SDValue RelAddr;
3204 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3205 ++NumMovwMovt;
3206 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3207 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3208 } else { // use literal pool for address constant
3209 ARMConstantPoolValue *CPV =
3210 ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
3211 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3212 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3213 RelAddr = DAG.getLoad(
3214 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3215 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3216 }
3217 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3218 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3219 return Result;
3220 }
3221
3222 // If we have T2 ops, we can materialize the address directly via movt/movw
3223 // pair. This is always cheaper.
3224 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3225 ++NumMovwMovt;
3226 // FIXME: Once remat is capable of dealing with instructions with register
3227 // operands, expand this into two nodes.
3228 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3229 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3230 } else {
3231 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3232 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3233 return DAG.getLoad(
3234 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3235 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3236 }
3237}
3238
3239SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3240 SelectionDAG &DAG) const {
3241 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3242, __PRETTY_FUNCTION__))
3242 "ROPI/RWPI not currently supported for Darwin")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3242, __PRETTY_FUNCTION__))
;
3243 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3244 SDLoc dl(Op);
3245 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3246
3247 if (Subtarget->useMovt(DAG.getMachineFunction()))
3248 ++NumMovwMovt;
3249
3250 // FIXME: Once remat is capable of dealing with instructions with register
3251 // operands, expand this into multiple nodes
3252 unsigned Wrapper =
3253 isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3254
3255 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3256 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3257
3258 if (Subtarget->isGVIndirectSymbol(GV))
3259 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3260 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3261 return Result;
3262}
3263
3264SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3265 SelectionDAG &DAG) const {
3266 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")((Subtarget->isTargetWindows() && "non-Windows COFF is not supported"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3266, __PRETTY_FUNCTION__))
;
3267 assert(Subtarget->useMovt(DAG.getMachineFunction()) &&((Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3268, __PRETTY_FUNCTION__))
3268 "Windows on ARM expects to use movw/movt")((Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3268, __PRETTY_FUNCTION__))
;
3269 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3270, __PRETTY_FUNCTION__))
3270 "ROPI/RWPI not currently supported for Windows")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3270, __PRETTY_FUNCTION__))
;
3271
3272 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3273 const ARMII::TOF TargetFlags =
3274 (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
3275 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3276 SDValue Result;
3277 SDLoc DL(Op);
3278
3279 ++NumMovwMovt;
3280
3281 // FIXME: Once remat is capable of dealing with instructions with register
3282 // operands, expand this into two nodes.
3283 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3284 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3285 TargetFlags));
3286 if (GV->hasDLLImportStorageClass())
3287 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3288 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3289 return Result;
3290}
3291
3292SDValue
3293ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3294 SDLoc dl(Op);
3295 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3296 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3297 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3298 Op.getOperand(1), Val);
3299}
3300
3301SDValue
3302ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3303 SDLoc dl(Op);
3304 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3305 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3306}
3307
3308SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3309 SelectionDAG &DAG) const {
3310 SDLoc dl(Op);
3311 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3312 Op.getOperand(0));
3313}
3314
3315SDValue
3316ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3317 const ARMSubtarget *Subtarget) const {
3318 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3319 SDLoc dl(Op);
3320 switch (IntNo) {
3321 default: return SDValue(); // Don't custom lower most intrinsics.
3322 case Intrinsic::thread_pointer: {
3323 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3324 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3325 }
3326 case Intrinsic::eh_sjlj_lsda: {
3327 MachineFunction &MF = DAG.getMachineFunction();
3328 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3329 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3330 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3331 SDValue CPAddr;
3332 bool IsPositionIndependent = isPositionIndependent();
3333 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3334 ARMConstantPoolValue *CPV =
3335 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
3336 ARMCP::CPLSDA, PCAdj);
3337 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3338 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3339 SDValue Result = DAG.getLoad(
3340 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3341 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3342
3343 if (IsPositionIndependent) {
3344 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3345 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3346 }
3347 return Result;
3348 }
3349 case Intrinsic::arm_neon_vabs:
3350 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3351 Op.getOperand(1));
3352 case Intrinsic::arm_neon_vmulls:
3353 case Intrinsic::arm_neon_vmullu: {
3354 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3355 ? ARMISD::VMULLs : ARMISD::VMULLu;
3356 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3357 Op.getOperand(1), Op.getOperand(2));
3358 }
3359 case Intrinsic::arm_neon_vminnm:
3360 case Intrinsic::arm_neon_vmaxnm: {
3361 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3362 ? ISD::FMINNUM : ISD::FMAXNUM;
3363 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3364 Op.getOperand(1), Op.getOperand(2));
3365 }
3366 case Intrinsic::arm_neon_vminu:
3367 case Intrinsic::arm_neon_vmaxu: {
3368 if (Op.getValueType().isFloatingPoint())
3369 return SDValue();
3370 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3371 ? ISD::UMIN : ISD::UMAX;
3372 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3373 Op.getOperand(1), Op.getOperand(2));
3374 }
3375 case Intrinsic::arm_neon_vmins:
3376 case Intrinsic::arm_neon_vmaxs: {
3377 // v{min,max}s is overloaded between signed integers and floats.
3378 if (!Op.getValueType().isFloatingPoint()) {
3379 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3380 ? ISD::SMIN : ISD::SMAX;
3381 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3382 Op.getOperand(1), Op.getOperand(2));
3383 }
3384 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3385 ? ISD::FMINNAN : ISD::FMAXNAN;
3386 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3387 Op.getOperand(1), Op.getOperand(2));
3388 }
3389 case Intrinsic::arm_neon_vtbl1:
3390 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3391 Op.getOperand(1), Op.getOperand(2));
3392 case Intrinsic::arm_neon_vtbl2:
3393 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3394 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3395 }
3396}
3397
3398static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
3399 const ARMSubtarget *Subtarget) {
3400 SDLoc dl(Op);
3401 ConstantSDNode *ScopeN = cast<ConstantSDNode>(Op.getOperand(2));
3402 auto Scope = static_cast<SynchronizationScope>(ScopeN->getZExtValue());
3403 if (Scope == SynchronizationScope::SingleThread)
3404 return Op;
3405
3406 if (!Subtarget->hasDataBarrier()) {
3407 // Some ARMv6 cpus can support data barriers with an mcr instruction.
3408 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3409 // here.
3410 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3411, __PRETTY_FUNCTION__))
3411 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3411, __PRETTY_FUNCTION__))
;
3412 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3413 DAG.getConstant(0, dl, MVT::i32));
3414 }
3415
3416 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3417 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3418 ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3419 if (Subtarget->isMClass()) {
3420 // Only a full system barrier exists in the M-class architectures.
3421 Domain = ARM_MB::SY;
3422 } else if (Subtarget->preferISHSTBarriers() &&
3423 Ord == AtomicOrdering::Release) {
3424 // Swift happens to implement ISHST barriers in a way that's compatible with
3425 // Release semantics but weaker than ISH so we'd be fools not to use
3426 // it. Beware: other processors probably don't!
3427 Domain = ARM_MB::ISHST;
3428 }
3429
3430 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3431 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3432 DAG.getConstant(Domain, dl, MVT::i32));
3433}
3434
3435static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
3436 const ARMSubtarget *Subtarget) {
3437 // ARM pre v5TE and Thumb1 does not have preload instructions.
3438 if (!(Subtarget->isThumb2() ||
3439 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3440 // Just preserve the chain.
3441 return Op.getOperand(0);
3442
3443 SDLoc dl(Op);
3444 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3445 if (!isRead &&
3446 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3447 // ARMv7 with MP extension has PLDW.
3448 return Op.getOperand(0);
3449
3450 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3451 if (Subtarget->isThumb()) {
3452 // Invert the bits.
3453 isRead = ~isRead & 1;
3454 isData = ~isData & 1;
3455 }
3456
3457 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3458 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3459 DAG.getConstant(isData, dl, MVT::i32));
3460}
3461
3462static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
3463 MachineFunction &MF = DAG.getMachineFunction();
3464 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3465
3466 // vastart just stores the address of the VarArgsFrameIndex slot into the
3467 // memory location argument.
3468 SDLoc dl(Op);
3469 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3470 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3471 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3472 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3473 MachinePointerInfo(SV));
3474}
3475
3476SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3477 CCValAssign &NextVA,
3478 SDValue &Root,
3479 SelectionDAG &DAG,
3480 const SDLoc &dl) const {
3481 MachineFunction &MF = DAG.getMachineFunction();
3482 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3483
3484 const TargetRegisterClass *RC;
3485 if (AFI->isThumb1OnlyFunction())
3486 RC = &ARM::tGPRRegClass;
3487 else
3488 RC = &ARM::GPRRegClass;
3489
3490 // Transform the arguments stored in physical registers into virtual ones.
3491 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3492 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3493
3494 SDValue ArgValue2;
3495 if (NextVA.isMemLoc()) {
3496 MachineFrameInfo &MFI = MF.getFrameInfo();
3497 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3498
3499 // Create load node to retrieve arguments from the stack.
3500 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3501 ArgValue2 = DAG.getLoad(
3502 MVT::i32, dl, Root, FIN,
3503 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3504 } else {
3505 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3506 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3507 }
3508 if (!Subtarget->isLittle())
3509 std::swap (ArgValue, ArgValue2);
3510 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3511}
3512
3513// The remaining GPRs hold either the beginning of variable-argument
3514// data, or the beginning of an aggregate passed by value (usually
3515// byval). Either way, we allocate stack slots adjacent to the data
3516// provided by our caller, and store the unallocated registers there.
3517// If this is a variadic function, the va_list pointer will begin with
3518// these values; otherwise, this reassembles a (byval) structure that
3519// was split between registers and memory.
3520// Return: The frame index registers were stored into.
3521int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3522 const SDLoc &dl, SDValue &Chain,
3523 const Value *OrigArg,
3524 unsigned InRegsParamRecordIdx,
3525 int ArgOffset, unsigned ArgSize) const {
3526 // Currently, two use-cases possible:
3527 // Case #1. Non-var-args function, and we meet first byval parameter.
3528 // Setup first unallocated register as first byval register;
3529 // eat all remained registers
3530 // (these two actions are performed by HandleByVal method).
3531 // Then, here, we initialize stack frame with
3532 // "store-reg" instructions.
3533 // Case #2. Var-args function, that doesn't contain byval parameters.
3534 // The same: eat all remained unallocated registers,
3535 // initialize stack frame.
3536
3537 MachineFunction &MF = DAG.getMachineFunction();
3538 MachineFrameInfo &MFI = MF.getFrameInfo();
3539 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3540 unsigned RBegin, REnd;
3541 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3542 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3543 } else {
3544 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3545 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3546 REnd = ARM::R4;
3547 }
3548
3549 if (REnd != RBegin)
3550 ArgOffset = -4 * (ARM::R4 - RBegin);
3551
3552 auto PtrVT = getPointerTy(DAG.getDataLayout());
3553 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3554 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3555
3556 SmallVector<SDValue, 4> MemOps;
3557 const TargetRegisterClass *RC =
3558 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3559
3560 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3561 unsigned VReg = MF.addLiveIn(Reg, RC);
3562 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3563 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3564 MachinePointerInfo(OrigArg, 4 * i));
3565 MemOps.push_back(Store);
3566 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3567 }
3568
3569 if (!MemOps.empty())
3570 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3571 return FrameIndex;
3572}
3573
3574// Setup stack frame, the va_list pointer will start from.
3575void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3576 const SDLoc &dl, SDValue &Chain,
3577 unsigned ArgOffset,
3578 unsigned TotalArgRegsSaveSize,
3579 bool ForceMutable) const {
3580 MachineFunction &MF = DAG.getMachineFunction();
3581 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3582
3583 // Try to store any remaining integer argument regs
3584 // to their spots on the stack so that they may be loaded by dereferencing
3585 // the result of va_next.
3586 // If there is no regs to be stored, just point address after last
3587 // argument passed via stack.
3588 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3589 CCInfo.getInRegsParamsCount(),
3590 CCInfo.getNextStackOffset(), 4);
3591 AFI->setVarArgsFrameIndex(FrameIndex);
3592}
3593
3594SDValue ARMTargetLowering::LowerFormalArguments(
3595 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3596 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3597 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3598 MachineFunction &MF = DAG.getMachineFunction();
3599 MachineFrameInfo &MFI = MF.getFrameInfo();
3600
3601 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3602
3603 // Assign locations to all of the incoming arguments.
3604 SmallVector<CCValAssign, 16> ArgLocs;
3605 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3606 *DAG.getContext());
3607 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3608
3609 SmallVector<SDValue, 16> ArgValues;
3610 SDValue ArgValue;
3611 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
3612 unsigned CurArgIdx = 0;
3613
3614 // Initially ArgRegsSaveSize is zero.
3615 // Then we increase this value each time we meet byval parameter.
3616 // We also increase this value in case of varargs function.
3617 AFI->setArgRegsSaveSize(0);
3618
3619 // Calculate the amount of stack space that we need to allocate to store
3620 // byval and variadic arguments that are passed in registers.
3621 // We need to know this before we allocate the first byval or variadic
3622 // argument, as they will be allocated a stack slot below the CFA (Canonical
3623 // Frame Address, the stack pointer at entry to the function).
3624 unsigned ArgRegBegin = ARM::R4;
3625 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3626 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3627 break;
3628
3629 CCValAssign &VA = ArgLocs[i];
3630 unsigned Index = VA.getValNo();
3631 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3632 if (!Flags.isByVal())
3633 continue;
3634
3635 assert(VA.isMemLoc() && "unexpected byval pointer in reg")((VA.isMemLoc() && "unexpected byval pointer in reg")
? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3635, __PRETTY_FUNCTION__))
;
3636 unsigned RBegin, REnd;
3637 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3638 ArgRegBegin = std::min(ArgRegBegin, RBegin);
3639
3640 CCInfo.nextInRegsParam();
3641 }
3642 CCInfo.rewindByValRegsInfo();
3643
3644 int lastInsIndex = -1;
3645 if (isVarArg && MFI.hasVAStart()) {
3646 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3647 if (RegIdx != array_lengthof(GPRArgRegs))
3648 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3649 }
3650
3651 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3652 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3653 auto PtrVT = getPointerTy(DAG.getDataLayout());
3654
3655 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3656 CCValAssign &VA = ArgLocs[i];
3657 if (Ins[VA.getValNo()].isOrigArg()) {
3658 std::advance(CurOrigArg,
3659 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3660 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3661 }
3662 // Arguments stored in registers.
3663 if (VA.isRegLoc()) {
3664 EVT RegVT = VA.getLocVT();
3665
3666 if (VA.needsCustom()) {
3667 // f64 and vector types are split up into multiple registers or
3668 // combinations of registers and stack slots.
3669 if (VA.getLocVT() == MVT::v2f64) {
3670 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3671 Chain, DAG, dl);
3672 VA = ArgLocs[++i]; // skip ahead to next loc
3673 SDValue ArgValue2;
3674 if (VA.isMemLoc()) {
3675 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3676 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3677 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3678 MachinePointerInfo::getFixedStack(
3679 DAG.getMachineFunction(), FI));
3680 } else {
3681 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3682 Chain, DAG, dl);
3683 }
3684 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3685 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3686 ArgValue, ArgValue1,
3687 DAG.getIntPtrConstant(0, dl));
3688 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3689 ArgValue, ArgValue2,
3690 DAG.getIntPtrConstant(1, dl));
3691 } else
3692 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3693
3694 } else {
3695 const TargetRegisterClass *RC;
3696
3697 if (RegVT == MVT::f32)
3698 RC = &ARM::SPRRegClass;
3699 else if (RegVT == MVT::f64)
3700 RC = &ARM::DPRRegClass;
3701 else if (RegVT == MVT::v2f64)
3702 RC = &ARM::QPRRegClass;
3703 else if (RegVT == MVT::i32)
3704 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3705 : &ARM::GPRRegClass;
3706 else
3707 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3707)
;
3708
3709 // Transform the arguments in physical registers into virtual ones.
3710 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3711 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3712 }
3713
3714 // If this is an 8 or 16-bit value, it is really passed promoted
3715 // to 32 bits. Insert an assert[sz]ext to capture this, then
3716 // truncate to the right size.
3717 switch (VA.getLocInfo()) {
3718 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3718)
;
3719 case CCValAssign::Full: break;
3720 case CCValAssign::BCvt:
3721 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3722 break;
3723 case CCValAssign::SExt:
3724 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3725 DAG.getValueType(VA.getValVT()));
3726 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3727 break;
3728 case CCValAssign::ZExt:
3729 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3730 DAG.getValueType(VA.getValVT()));
3731 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3732 break;
3733 }
3734
3735 InVals.push_back(ArgValue);
3736
3737 } else { // VA.isRegLoc()
3738 // sanity check
3739 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3739, __PRETTY_FUNCTION__))
;
3740 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")((VA.getValVT() != MVT::i64 && "i64 should already be lowered"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() != MVT::i64 && \"i64 should already be lowered\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3740, __PRETTY_FUNCTION__))
;
3741
3742 int index = VA.getValNo();
3743
3744 // Some Ins[] entries become multiple ArgLoc[] entries.
3745 // Process them only once.
3746 if (index != lastInsIndex)
3747 {
3748 ISD::ArgFlagsTy Flags = Ins[index].Flags;
3749 // FIXME: For now, all byval parameter objects are marked mutable.
3750 // This can be changed with more analysis.
3751 // In case of tail call optimization mark all arguments mutable.
3752 // Since they could be overwritten by lowering of arguments in case of
3753 // a tail call.
3754 if (Flags.isByVal()) {
3755 assert(Ins[index].isOrigArg() &&((Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3756, __PRETTY_FUNCTION__))
3756 "Byval arguments cannot be implicit")((Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3756, __PRETTY_FUNCTION__))
;
3757 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3758
3759 int FrameIndex = StoreByValRegs(
3760 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3761 VA.getLocMemOffset(), Flags.getByValSize());
3762 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3763 CCInfo.nextInRegsParam();
3764 } else {
3765 unsigned FIOffset = VA.getLocMemOffset();
3766 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3767 FIOffset, true);
3768
3769 // Create load nodes to retrieve arguments from the stack.
3770 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3771 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3772 MachinePointerInfo::getFixedStack(
3773 DAG.getMachineFunction(), FI)));
3774 }
3775 lastInsIndex = index;
3776 }
3777 }
3778 }
3779
3780 // varargs
3781 if (isVarArg && MFI.hasVAStart())
3782 VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3783 CCInfo.getNextStackOffset(),
3784 TotalArgRegsSaveSize);
3785
3786 AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
3787
3788 return Chain;
3789}
3790
3791/// isFloatingPointZero - Return true if this is +0.0.
3792static bool isFloatingPointZero(SDValue Op) {
3793 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3794 return CFP->getValueAPF().isPosZero();
3795 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3796 // Maybe this has already been legalized into the constant pool?
3797 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3798 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3799 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3800 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3801 return CFP->getValueAPF().isPosZero();
3802 }
3803 } else if (Op->getOpcode() == ISD::BITCAST &&
3804 Op->getValueType(0) == MVT::f64) {
3805 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3806 // created by LowerConstantFP().
3807 SDValue BitcastOp = Op->getOperand(0);
3808 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3809 isNullConstant(BitcastOp->getOperand(0)))
3810 return true;
3811 }
3812 return false;
3813}
3814
3815/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3816/// the given operands.
3817SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3818 SDValue &ARMcc, SelectionDAG &DAG,
3819 const SDLoc &dl) const {
3820 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3821 unsigned C = RHSC->getZExtValue();
3822 if (!isLegalICmpImmediate(C)) {
3823 // Constant does not fit, try adjusting it by one?
3824 switch (CC) {
3825 default: break;
3826 case ISD::SETLT:
3827 case ISD::SETGE:
3828 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3829 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3830 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3831 }
3832 break;
3833 case ISD::SETULT:
3834 case ISD::SETUGE:
3835 if (C != 0 && isLegalICmpImmediate(C-1)) {
3836 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3837 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3838 }
3839 break;
3840 case ISD::SETLE:
3841 case ISD::SETGT:
3842 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3843 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3844 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3845 }
3846 break;
3847 case ISD::SETULE:
3848 case ISD::SETUGT:
3849 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3850 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3851 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3852 }
3853 break;
3854 }
3855 }
3856 }
3857
3858 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3859 ARMISD::NodeType CompareType;
3860 switch (CondCode) {
3861 default:
3862 CompareType = ARMISD::CMP;
3863 break;
3864 case ARMCC::EQ:
3865 case ARMCC::NE:
3866 // Uses only Z Flag
3867 CompareType = ARMISD::CMPZ;
3868 break;
3869 }
3870 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3871 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3872}
3873
3874/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3875SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3876 SelectionDAG &DAG, const SDLoc &dl,
3877 bool InvalidOnQNaN) const {
3878 assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64)((!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64
) ? static_cast<void> (0) : __assert_fail ("!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3878, __PRETTY_FUNCTION__))
;
3879 SDValue Cmp;
3880 SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3881 if (!isFloatingPointZero(RHS))
3882 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3883 else
3884 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3885 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3886}
3887
3888/// duplicateCmp - Glue values can have only one use, so this function
3889/// duplicates a comparison node.
3890SDValue
3891ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3892 unsigned Opc = Cmp.getOpcode();
3893 SDLoc DL(Cmp);
3894 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3895 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3896
3897 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")((Opc == ARMISD::FMSTAT && "unexpected comparison operation"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3897, __PRETTY_FUNCTION__))
;
3898 Cmp = Cmp.getOperand(0);
3899 Opc = Cmp.getOpcode();
3900 if (Opc == ARMISD::CMPFP)
3901 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3902 Cmp.getOperand(1), Cmp.getOperand(2));
3903 else {
3904 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")((Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3904, __PRETTY_FUNCTION__))
;
3905 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3906 Cmp.getOperand(1));
3907 }
3908 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3909}
3910
3911std::pair<SDValue, SDValue>
3912ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3913 SDValue &ARMcc) const {
3914 assert(Op.getValueType() == MVT::i32 && "Unsupported value type")((Op.getValueType() == MVT::i32 && "Unsupported value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3914, __PRETTY_FUNCTION__))
;
3915
3916 SDValue Value, OverflowCmp;
3917 SDValue LHS = Op.getOperand(0);
3918 SDValue RHS = Op.getOperand(1);
3919 SDLoc dl(Op);
3920
3921 // FIXME: We are currently always generating CMPs because we don't support
3922 // generating CMN through the backend. This is not as good as the natural
3923 // CMP case because it causes a register dependency and cannot be folded
3924 // later.
3925
3926 switch (Op.getOpcode()) {
3927 default:
3928 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 3928)
;
3929 case ISD::SADDO:
3930 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3931 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3932 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3933 break;
3934 case ISD::UADDO:
3935 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3936 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3937 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3938 break;
3939 case ISD::SSUBO:
3940 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3941 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3942 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3943 break;
3944 case ISD::USUBO:
3945 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3946 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3947 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3948 break;
3949 } // switch (...)
3950
3951 return std::make_pair(Value, OverflowCmp);
3952}
3953
3954SDValue
3955ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
3956 // Let legalize expand this if it isn't a legal type yet.
3957 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3958 return SDValue();
3959
3960 SDValue Value, OverflowCmp;
3961 SDValue ARMcc;
3962 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3963 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3964 SDLoc dl(Op);
3965 // We use 0 and 1 as false and true values.
3966 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3967 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3968 EVT VT = Op.getValueType();
3969
3970 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
3971 ARMcc, CCR, OverflowCmp);
3972
3973 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3974 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3975}
3976
3977SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3978 SDValue Cond = Op.getOperand(0);
3979 SDValue SelectTrue = Op.getOperand(1);
3980 SDValue SelectFalse = Op.getOperand(2);
3981 SDLoc dl(Op);
3982 unsigned Opc = Cond.getOpcode();
3983
3984 if (Cond.getResNo() == 1 &&
3985 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3986 Opc == ISD::USUBO)) {
3987 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
3988 return SDValue();
3989
3990 SDValue Value, OverflowCmp;
3991 SDValue ARMcc;
3992 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
3993 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3994 EVT VT = Op.getValueType();
3995
3996 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
3997 OverflowCmp, DAG);
3998 }
3999
4000 // Convert:
4001 //
4002 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
4003 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
4004 //
4005 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
4006 const ConstantSDNode *CMOVTrue =
4007 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
4008 const ConstantSDNode *CMOVFalse =
4009 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
4010
4011 if (CMOVTrue && CMOVFalse) {
4012 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
4013 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
4014
4015 SDValue True;
4016 SDValue False;
4017 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4018 True = SelectTrue;
4019 False = SelectFalse;
4020 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4021 True = SelectFalse;
4022 False = SelectTrue;
4023 }
4024
4025 if (True.getNode() && False.getNode()) {
4026 EVT VT = Op.getValueType();
4027 SDValue ARMcc = Cond.getOperand(2);
4028 SDValue CCR = Cond.getOperand(3);
4029 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
4030 assert(True.getValueType() == VT)((True.getValueType() == VT) ? static_cast<void> (0) : __assert_fail
("True.getValueType() == VT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4030, __PRETTY_FUNCTION__))
;
4031 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
4032 }
4033 }
4034 }
4035
4036 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4037 // undefined bits before doing a full-word comparison with zero.
4038 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4039 DAG.getConstant(1, dl, Cond.getValueType()));
4040
4041 return DAG.getSelectCC(dl, Cond,
4042 DAG.getConstant(0, dl, Cond.getValueType()),
4043 SelectTrue, SelectFalse, ISD::SETNE);
4044}
4045
4046static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
4047 bool &swpCmpOps, bool &swpVselOps) {
4048 // Start by selecting the GE condition code for opcodes that return true for
4049 // 'equality'
4050 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4051 CC == ISD::SETULE)
4052 CondCode = ARMCC::GE;
4053
4054 // and GT for opcodes that return false for 'equality'.
4055 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4056 CC == ISD::SETULT)
4057 CondCode = ARMCC::GT;
4058
4059 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4060 // to swap the compare operands.
4061 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4062 CC == ISD::SETULT)
4063 swpCmpOps = true;
4064
4065 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4066 // If we have an unordered opcode, we need to swap the operands to the VSEL
4067 // instruction (effectively negating the condition).
4068 //
4069 // This also has the effect of swapping which one of 'less' or 'greater'
4070 // returns true, so we also swap the compare operands. It also switches
4071 // whether we return true for 'equality', so we compensate by picking the
4072 // opposite condition code to our original choice.
4073 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4074 CC == ISD::SETUGT) {
4075 swpCmpOps = !swpCmpOps;
4076 swpVselOps = !swpVselOps;
4077 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4078 }
4079
4080 // 'ordered' is 'anything but unordered', so use the VS condition code and
4081 // swap the VSEL operands.
4082 if (CC == ISD::SETO) {
4083 CondCode = ARMCC::VS;
4084 swpVselOps = true;
4085 }
4086
4087 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4088 // code and swap the VSEL operands.
4089 if (CC == ISD::SETUNE) {
4090 CondCode = ARMCC::EQ;
4091 swpVselOps = true;
4092 }
4093}
4094
4095SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4096 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4097 SDValue Cmp, SelectionDAG &DAG) const {
4098 if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
4099 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4100 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4101 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4102 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4103
4104 SDValue TrueLow = TrueVal.getValue(0);
4105 SDValue TrueHigh = TrueVal.getValue(1);
4106 SDValue FalseLow = FalseVal.getValue(0);
4107 SDValue FalseHigh = FalseVal.getValue(1);
4108
4109 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4110 ARMcc, CCR, Cmp);
4111 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4112 ARMcc, CCR, duplicateCmp(Cmp, DAG));
4113
4114 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
4115 } else {
4116 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
4117 Cmp);
4118 }
4119}
4120
4121static bool isGTorGE(ISD::CondCode CC) {
4122 return CC == ISD::SETGT || CC == ISD::SETGE;
4123}
4124
4125static bool isLTorLE(ISD::CondCode CC) {
4126 return CC == ISD::SETLT || CC == ISD::SETLE;
4127}
4128
4129// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
4130// All of these conditions (and their <= and >= counterparts) will do:
4131// x < k ? k : x
4132// x > k ? x : k
4133// k < x ? x : k
4134// k > x ? k : x
4135static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
4136 const SDValue TrueVal, const SDValue FalseVal,
4137 const ISD::CondCode CC, const SDValue K) {
4138 return (isGTorGE(CC) &&
4139 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
4140 (isLTorLE(CC) &&
4141 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
4142}
4143
4144// Similar to isLowerSaturate(), but checks for upper-saturating conditions.
4145static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
4146 const SDValue TrueVal, const SDValue FalseVal,
4147 const ISD::CondCode CC, const SDValue K) {
4148 return (isGTorGE(CC) &&
4149 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
4150 (isLTorLE(CC) &&
4151 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
4152}
4153
4154// Check if two chained conditionals could be converted into SSAT.
4155//
4156// SSAT can replace a set of two conditional selectors that bound a number to an
4157// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
4158//
4159// x < -k ? -k : (x > k ? k : x)
4160// x < -k ? -k : (x < k ? x : k)
4161// x > -k ? (x > k ? k : x) : -k
4162// x < k ? (x < -k ? -k : x) : k
4163// etc.
4164//
4165// It returns true if the conversion can be done, false otherwise.
4166// Additionally, the variable is returned in parameter V and the constant in K.
4167static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
4168 uint64_t &K) {
4169 SDValue LHS1 = Op.getOperand(0);
4170 SDValue RHS1 = Op.getOperand(1);
4171 SDValue TrueVal1 = Op.getOperand(2);
4172 SDValue FalseVal1 = Op.getOperand(3);
4173 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4174
4175 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
4176 if (Op2.getOpcode() != ISD::SELECT_CC)
4177 return false;
4178
4179 SDValue LHS2 = Op2.getOperand(0);
4180 SDValue RHS2 = Op2.getOperand(1);
4181 SDValue TrueVal2 = Op2.getOperand(2);
4182 SDValue FalseVal2 = Op2.getOperand(3);
4183 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
4184
4185 // Find out which are the constants and which are the variables
4186 // in each conditional
4187 SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
4188 ? &RHS1
4189 : nullptr;
4190 SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
4191 ? &RHS2
4192 : nullptr;
4193 SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
4194 SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
4195 SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
4196 SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
4197
4198 // We must detect cases where the original operations worked with 16- or
4199 // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
4200 // must work with sign-extended values but the select operations return
4201 // the original non-extended value.
4202 SDValue V2TmpReg = V2Tmp;
4203 if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
4204 V2TmpReg = V2Tmp->getOperand(0);
4205
4206 // Check that the registers and the constants have the correct values
4207 // in both conditionals
4208 if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
4209 V2TmpReg != V2)
4210 return false;
4211
4212 // Figure out which conditional is saturating the lower/upper bound.
4213 const SDValue *LowerCheckOp =
4214 isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4215 ? &Op
4216 : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4217 ? &Op2
4218 : nullptr;
4219 const SDValue *UpperCheckOp =
4220 isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4221 ? &Op
4222 : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4223 ? &Op2
4224 : nullptr;
4225
4226 if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
4227 return false;
4228
4229 // Check that the constant in the lower-bound check is
4230 // the opposite of the constant in the upper-bound check
4231 // in 1's complement.
4232 int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
4233 int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
4234 int64_t PosVal = std::max(Val1, Val2);
4235
4236 if (((Val1 > Val2 && UpperCheckOp == &Op) ||
4237 (Val1 < Val2 && UpperCheckOp == &Op2)) &&
4238 Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {
4239
4240 V = V2;
4241 K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
4242 return true;
4243 }
4244
4245 return false;
4246}
4247
4248SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4249 EVT VT = Op.getValueType();
4250 SDLoc dl(Op);
4251
4252 // Try to convert two saturating conditional selects into a single SSAT
4253 SDValue SatValue;
4254 uint64_t SatConstant;
4255 if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
4256 isSaturatingConditional(Op, SatValue, SatConstant))
4257 return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4258 DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4259
4260 SDValue LHS = Op.getOperand(0);
4261 SDValue RHS = Op.getOperand(1);
4262 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4263 SDValue TrueVal = Op.getOperand(2);
4264 SDValue FalseVal = Op.getOperand(3);
4265
4266 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4267 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4268 dl);
4269
4270 // If softenSetCCOperands only returned one value, we should compare it to
4271 // zero.
4272 if (!RHS.getNode()) {
4273 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4274 CC = ISD::SETNE;
4275 }
4276 }
4277
4278 if (LHS.getValueType() == MVT::i32) {
4279 // Try to generate VSEL on ARMv8.
4280 // The VSEL instruction can't use all the usual ARM condition
4281 // codes: it only has two bits to select the condition code, so it's
4282 // constrained to use only GE, GT, VS and EQ.
4283 //
4284 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
4285 // swap the operands of the previous compare instruction (effectively
4286 // inverting the compare condition, swapping 'less' and 'greater') and
4287 // sometimes need to swap the operands to the VSEL (which inverts the
4288 // condition in the sense of firing whenever the previous condition didn't)
4289 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4290 TrueVal.getValueType() == MVT::f64)) {
4291 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4292 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
4293 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
4294 CC = ISD::getSetCCInverse(CC, true);
4295 std::swap(TrueVal, FalseVal);
4296 }
4297 }
4298
4299 SDValue ARMcc;
4300 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4301 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4302 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4303 }
4304
4305 ARMCC::CondCodes CondCode, CondCode2;
4306 bool InvalidOnQNaN;
4307 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4308
4309 // Try to generate VMAXNM/VMINNM on ARMv8.
4310 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4311 TrueVal.getValueType() == MVT::f64)) {
4312 bool swpCmpOps = false;
4313 bool swpVselOps = false;
4314 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
4315
4316 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
4317 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
4318 if (swpCmpOps)
4319 std::swap(LHS, RHS);
4320 if (swpVselOps)
4321 std::swap(TrueVal, FalseVal);
4322 }
4323 }
4324
4325 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4326 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4327 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4328 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4329 if (CondCode2 != ARMCC::AL) {
4330 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
4331 // FIXME: Needs another CMP because flag can have but one use.
4332 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4333 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
4334 }
4335 return Result;
4336}
4337
4338/// canChangeToInt - Given the fp compare operand, return true if it is suitable
4339/// to morph to an integer compare sequence.
4340static bool canChangeToInt(SDValue Op, bool &SeenZero,
4341 const ARMSubtarget *Subtarget) {
4342 SDNode *N = Op.getNode();
4343 if (!N->hasOneUse())
4344 // Otherwise it requires moving the value from fp to integer registers.
4345 return false;
4346 if (!N->getNumValues())
4347 return false;
4348 EVT VT = Op.getValueType();
4349 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
4350 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
4351 // vmrs are very slow, e.g. cortex-a8.
4352 return false;
4353
4354 if (isFloatingPointZero(Op)) {
4355 SeenZero = true;
4356 return true;
4357 }
4358 return ISD::isNormalLoad(N);
4359}
4360
4361static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
4362 if (isFloatingPointZero(Op))
4363 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
4364
4365 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
4366 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
4367 Ld->getPointerInfo(), Ld->getAlignment(),
4368 Ld->getMemOperand()->getFlags());
4369
4370 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4370)
;
4371}
4372
4373static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
4374 SDValue &RetVal1, SDValue &RetVal2) {
4375 SDLoc dl(Op);
4376
4377 if (isFloatingPointZero(Op)) {
4378 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
4379 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
4380 return;
4381 }
4382
4383 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
4384 SDValue Ptr = Ld->getBasePtr();
4385 RetVal1 =
4386 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
4387 Ld->getAlignment(), Ld->getMemOperand()->getFlags());
4388
4389 EVT PtrType = Ptr.getValueType();
4390 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
4391 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
4392 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
4393 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
4394 Ld->getPointerInfo().getWithOffset(4), NewAlign,
4395 Ld->getMemOperand()->getFlags());
4396 return;
4397 }
4398
4399 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4399)
;
4400}
4401
4402/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
4403/// f32 and even f64 comparisons to integer ones.
4404SDValue
4405ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
4406 SDValue Chain = Op.getOperand(0);
4407 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4408 SDValue LHS = Op.getOperand(2);
4409 SDValue RHS = Op.getOperand(3);
4410 SDValue Dest = Op.getOperand(4);
4411 SDLoc dl(Op);
4412
4413 bool LHSSeenZero = false;
4414 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
4415 bool RHSSeenZero = false;
4416 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
4417 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
4418 // If unsafe fp math optimization is enabled and there are no other uses of
4419 // the CMP operands, and the condition code is EQ or NE, we can optimize it
4420 // to an integer comparison.
4421 if (CC == ISD::SETOEQ)
4422 CC = ISD::SETEQ;
4423 else if (CC == ISD::SETUNE)
4424 CC = ISD::SETNE;
4425
4426 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4427 SDValue ARMcc;
4428 if (LHS.getValueType() == MVT::f32) {
4429 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4430 bitcastf32Toi32(LHS, DAG), Mask);
4431 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4432 bitcastf32Toi32(RHS, DAG), Mask);
4433 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4434 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4435 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4436 Chain, Dest, ARMcc, CCR, Cmp);
4437 }
4438
4439 SDValue LHS1, LHS2;
4440 SDValue RHS1, RHS2;
4441 expandf64Toi32(LHS, DAG, LHS1, LHS2);
4442 expandf64Toi32(RHS, DAG, RHS1, RHS2);
4443 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
4444 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
4445 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4446 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4447 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4448 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
4449 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
4450 }
4451
4452 return SDValue();
4453}
4454
4455SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4456 SDValue Chain = Op.getOperand(0);
4457 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4458 SDValue LHS = Op.getOperand(2);
4459 SDValue RHS = Op.getOperand(3);
4460 SDValue Dest = Op.getOperand(4);
4461 SDLoc dl(Op);
4462
4463 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4464 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4465 dl);
4466
4467 // If softenSetCCOperands only returned one value, we should compare it to
4468 // zero.
4469 if (!RHS.getNode()) {
4470 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4471 CC = ISD::SETNE;
4472 }
4473 }
4474
4475 if (LHS.getValueType() == MVT::i32) {
4476 SDValue ARMcc;
4477 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4478 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4479 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4480 Chain, Dest, ARMcc, CCR, Cmp);
4481 }
4482
4483 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT
::f64) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4483, __PRETTY_FUNCTION__))
;
4484
4485 if (getTargetMachine().Options.UnsafeFPMath &&
4486 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
4487 CC == ISD::SETNE || CC == ISD::SETUNE)) {
4488 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
4489 return Result;
4490 }
4491
4492 ARMCC::CondCodes CondCode, CondCode2;
4493 bool InvalidOnQNaN;
4494 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4495
4496 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4497 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4498 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4499 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4500 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
4501 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4502 if (CondCode2 != ARMCC::AL) {
4503 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
4504 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
4505 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4506 }
4507 return Res;
4508}
4509
4510SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
4511 SDValue Chain = Op.getOperand(0);
4512 SDValue Table = Op.getOperand(1);
4513 SDValue Index = Op.getOperand(2);
4514 SDLoc dl(Op);
4515
4516 EVT PTy = getPointerTy(DAG.getDataLayout());
4517 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
4518 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
4519 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
4520 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
4521 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
4522 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
4523 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
4524 // which does another jump to the destination. This also makes it easier
4525 // to translate it to TBB / TBH later (Thumb2 only).
4526 // FIXME: This might not work if the function is extremely large.
4527 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
4528 Addr, Op.getOperand(2), JTI);
4529 }
4530 if (isPositionIndependent() || Subtarget->isROPI()) {
4531 Addr =
4532 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
4533 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4534 Chain = Addr.getValue(1);
4535 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
4536 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4537 } else {
4538 Addr =
4539 DAG.getLoad(PTy, dl, Chain, Addr,
4540 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4541 Chain = Addr.getValue(1);
4542 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4543 }
4544}
4545
4546static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
4547 EVT VT = Op.getValueType();
4548 SDLoc dl(Op);
4549
4550 if (Op.getValueType().getVectorElementType() == MVT::i32) {
4551 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
4552 return Op;
4553 return DAG.UnrollVectorOp(Op.getNode());
4554 }
4555
4556 assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&((Op.getOperand(0).getValueType() == MVT::v4f32 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4557, __PRETTY_FUNCTION__))
4557 "Invalid type for custom lowering!")((Op.getOperand(0).getValueType() == MVT::v4f32 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4557, __PRETTY_FUNCTION__))
;
4558 if (VT != MVT::v4i16)
4559 return DAG.UnrollVectorOp(Op.getNode());
4560
4561 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
4562 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
4563}
4564
4565SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
4566 EVT VT = Op.getValueType();
4567 if (VT.isVector())
4568 return LowerVectorFP_TO_INT(Op, DAG);
4569 if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
4570 RTLIB::Libcall LC;
4571 if (Op.getOpcode() == ISD::FP_TO_SINT)
4572 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
4573 Op.getValueType());
4574 else
4575 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
4576 Op.getValueType());
4577 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4578 /*isSigned*/ false, SDLoc(Op)).first;
4579 }
4580
4581 return Op;
4582}
4583
4584static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
4585 EVT VT = Op.getValueType();
4586 SDLoc dl(Op);
4587
4588 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
4589 if (VT.getVectorElementType() == MVT::f32)
4590 return Op;
4591 return DAG.UnrollVectorOp(Op.getNode());
4592 }
4593
4594 assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&((Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4595, __PRETTY_FUNCTION__))
4595 "Invalid type for custom lowering!")((Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4595, __PRETTY_FUNCTION__))
;
4596 if (VT != MVT::v4f32)
4597 return DAG.UnrollVectorOp(Op.getNode());
4598
4599 unsigned CastOpc;
4600 unsigned Opc;
4601 switch (Op.getOpcode()) {
4602 default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4602)
;
4603 case ISD::SINT_TO_FP:
4604 CastOpc = ISD::SIGN_EXTEND;
4605 Opc = ISD::SINT_TO_FP;
4606 break;
4607 case ISD::UINT_TO_FP:
4608 CastOpc = ISD::ZERO_EXTEND;
4609 Opc = ISD::UINT_TO_FP;
4610 break;
4611 }
4612
4613 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
4614 return DAG.getNode(Opc, dl, VT, Op);
4615}
4616
4617SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
4618 EVT VT = Op.getValueType();
4619 if (VT.isVector())
4620 return LowerVectorINT_TO_FP(Op, DAG);
4621 if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
4622 RTLIB::Libcall LC;
4623 if (Op.getOpcode() == ISD::SINT_TO_FP)
4624 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
4625 Op.getValueType());
4626 else
4627 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
4628 Op.getValueType());
4629 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4630 /*isSigned*/ false, SDLoc(Op)).first;
4631 }
4632
4633 return Op;
4634}
4635
4636SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
4637 // Implement fcopysign with a fabs and a conditional fneg.
4638 SDValue Tmp0 = Op.getOperand(0);
4639 SDValue Tmp1 = Op.getOperand(1);
4640 SDLoc dl(Op);
4641 EVT VT = Op.getValueType();
4642 EVT SrcVT = Tmp1.getValueType();
4643 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
4644 Tmp0.getOpcode() == ARMISD::VMOVDRR;
4645 bool UseNEON = !InGPR && Subtarget->hasNEON();
4646
4647 if (UseNEON) {
4648 // Use VBSL to copy the sign bit.
4649 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
4650 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
4651 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
4652 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
4653 if (VT == MVT::f64)
4654 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4655 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
4656 DAG.getConstant(32, dl, MVT::i32));
4657 else /*if (VT == MVT::f32)*/
4658 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
4659 if (SrcVT == MVT::f32) {
4660 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
4661 if (VT == MVT::f64)
4662 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4663 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
4664 DAG.getConstant(32, dl, MVT::i32));
4665 } else if (VT == MVT::f32)
4666 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
4667 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
4668 DAG.getConstant(32, dl, MVT::i32));
4669 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
4670 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
4671
4672 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
4673 dl, MVT::i32);
4674 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
4675 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
4676 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
4677
4678 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
4679 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
4680 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
4681 if (VT == MVT::f32) {
4682 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
4683 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
4684 DAG.getConstant(0, dl, MVT::i32));
4685 } else {
4686 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
4687 }
4688
4689 return Res;
4690 }
4691
4692 // Bitcast operand 1 to i32.
4693 if (SrcVT == MVT::f64)
4694 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4695 Tmp1).getValue(1);
4696 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
4697
4698 // Or in the signbit with integer operations.
4699 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
4700 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4701 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
4702 if (VT == MVT::f32) {
4703 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
4704 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
4705 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4706 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
4707 }
4708
4709 // f64: Or the high part with signbit and then combine two parts.
4710 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4711 Tmp0);
4712 SDValue Lo = Tmp0.getValue(0);
4713 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
4714 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
4715 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
4716}
4717
4718SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
4719 MachineFunction &MF = DAG.getMachineFunction();
4720 MachineFrameInfo &MFI = MF.getFrameInfo();
4721 MFI.setReturnAddressIsTaken(true);
4722
4723 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
4724 return SDValue();
4725
4726 EVT VT = Op.getValueType();
4727 SDLoc dl(Op);
4728 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4729 if (Depth) {
4730 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4731 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
4732 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
4733 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
4734 MachinePointerInfo());
4735 }
4736
4737 // Return LR, which contains the return address. Mark it an implicit live-in.
4738 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4739 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
4740}
4741
4742SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
4743 const ARMBaseRegisterInfo &ARI =
4744 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
4745 MachineFunction &MF = DAG.getMachineFunction();
4746 MachineFrameInfo &MFI = MF.getFrameInfo();
4747 MFI.setFrameAddressIsTaken(true);
4748
4749 EVT VT = Op.getValueType();
4750 SDLoc dl(Op); // FIXME probably not meaningful
4751 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4752 unsigned FrameReg = ARI.getFrameRegister(MF);
4753 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
4754 while (Depth--)
4755 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
4756 MachinePointerInfo());
4757 return FrameAddr;
4758}
4759
4760// FIXME? Maybe this could be a TableGen attribute on some registers and
4761// this table could be generated automatically from RegInfo.
4762unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
4763 SelectionDAG &DAG) const {
4764 unsigned Reg = StringSwitch<unsigned>(RegName)
4765 .Case("sp", ARM::SP)
4766 .Default(0);
4767 if (Reg)
4768 return Reg;
4769 report_fatal_error(Twine("Invalid register name \""
4770 + StringRef(RegName) + "\"."));
4771}
4772
4773// Result is 64 bit value so split into two 32 bit values and return as a
4774// pair of values.
4775static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
4776 SelectionDAG &DAG) {
4777 SDLoc DL(N);
4778
4779 // This function is only supposed to be called for i64 type destination.
4780 assert(N->getValueType(0) == MVT::i64((N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4781, __PRETTY_FUNCTION__))
4781 && "ExpandREAD_REGISTER called for non-i64 type result.")((N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4781, __PRETTY_FUNCTION__))
;
4782
4783 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
4784 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
4785 N->getOperand(0),
4786 N->getOperand(1));
4787
4788 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
4789 Read.getValue(1)));
4790 Results.push_back(Read.getOperand(0));
4791}
4792
4793/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
4794/// When \p DstVT, the destination type of \p BC, is on the vector
4795/// register bank and the source of bitcast, \p Op, operates on the same bank,
4796/// it might be possible to combine them, such that everything stays on the
4797/// vector register bank.
4798/// \p return The node that would replace \p BT, if the combine
4799/// is possible.
4800static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
4801 SelectionDAG &DAG) {
4802 SDValue Op = BC->getOperand(0);
4803 EVT DstVT = BC->getValueType(0);
4804
4805 // The only vector instruction that can produce a scalar (remember,
4806 // since the bitcast was about to be turned into VMOVDRR, the source
4807 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
4808 // Moreover, we can do this combine only if there is one use.
4809 // Finally, if the destination type is not a vector, there is not
4810 // much point on forcing everything on the vector bank.
4811 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
4812 !Op.hasOneUse())
4813 return SDValue();
4814
4815 // If the index is not constant, we will introduce an additional
4816 // multiply that will stick.
4817 // Give up in that case.
4818 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4819 if (!Index)
4820 return SDValue();
4821 unsigned DstNumElt = DstVT.getVectorNumElements();
4822
4823 // Compute the new index.
4824 const APInt &APIntIndex = Index->getAPIntValue();
4825 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
4826 NewIndex *= APIntIndex;
4827 // Check if the new constant index fits into i32.
4828 if (NewIndex.getBitWidth() > 32)
4829 return SDValue();
4830
4831 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
4832 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
4833 SDLoc dl(Op);
4834 SDValue ExtractSrc = Op.getOperand(0);
4835 EVT VecVT = EVT::getVectorVT(
4836 *DAG.getContext(), DstVT.getScalarType(),
4837 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
4838 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
4839 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
4840 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
4841}
4842
4843/// ExpandBITCAST - If the target supports VFP, this function is called to
4844/// expand a bit convert where either the source or destination type is i64 to
4845/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
4846/// operand type is illegal (e.g., v2f32 for a target that doesn't support
4847/// vectors), since the legalizer won't know what to do with that.
4848static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
4849 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4850 SDLoc dl(N);
4851 SDValue Op = N->getOperand(0);
4852
4853 // This function is only supposed to be called for i64 types, either as the
4854 // source or destination of the bit convert.
4855 EVT SrcVT = Op.getValueType();
4856 EVT DstVT = N->getValueType(0);
4857 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&(((SrcVT == MVT::i64 || DstVT == MVT::i64) && "ExpandBITCAST called for non-i64 type"
) ? static_cast<void> (0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4858, __PRETTY_FUNCTION__))
4858 "ExpandBITCAST called for non-i64 type")(((SrcVT == MVT::i64 || DstVT == MVT::i64) && "ExpandBITCAST called for non-i64 type"
) ? static_cast<void> (0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4858, __PRETTY_FUNCTION__))
;
4859
4860 // Turn i64->f64 into VMOVDRR.
4861 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
4862 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
4863 // if we can combine the bitcast with its source.
4864 if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
4865 return Val;
4866
4867 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4868 DAG.getConstant(0, dl, MVT::i32));
4869 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4870 DAG.getConstant(1, dl, MVT::i32));
4871 return DAG.getNode(ISD::BITCAST, dl, DstVT,
4872 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
4873 }
4874
4875 // Turn f64->i64 into VMOVRRD.
4876 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
4877 SDValue Cvt;
4878 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
4879 SrcVT.getVectorNumElements() > 1)
4880 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4881 DAG.getVTList(MVT::i32, MVT::i32),
4882 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
4883 else
4884 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4885 DAG.getVTList(MVT::i32, MVT::i32), Op);
4886 // Merge the pieces into a single i64 value.
4887 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
4888 }
4889
4890 return SDValue();
4891}
4892
4893/// getZeroVector - Returns a vector of specified type with all zero elements.
4894/// Zero vectors are used to represent vector negation and in those cases
4895/// will be implemented with the NEON VNEG instruction. However, VNEG does
4896/// not support i64 elements, so sometimes the zero vectors will need to be
4897/// explicitly constructed. Regardless, use a canonical VMOV to create the
4898/// zero vector.
4899static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4900 assert(VT.isVector() && "Expected a vector type")((VT.isVector() && "Expected a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"Expected a vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4900, __PRETTY_FUNCTION__))
;
4901 // The canonical modified immediate encoding of a zero vector is....0!
4902 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
4903 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
4904 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
4905 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4906}
4907
4908/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4909/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4910SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
4911 SelectionDAG &DAG) const {
4912 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4912, __PRETTY_FUNCTION__))
;
4913 EVT VT = Op.getValueType();
4914 unsigned VTBits = VT.getSizeInBits();
4915 SDLoc dl(Op);
4916 SDValue ShOpLo = Op.getOperand(0);
4917 SDValue ShOpHi = Op.getOperand(1);
4918 SDValue ShAmt = Op.getOperand(2);
4919 SDValue ARMcc;
4920 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4921 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4922
4923 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::
SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4923, __PRETTY_FUNCTION__))
;
4924
4925 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4926 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4927 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4928 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4929 DAG.getConstant(VTBits, dl, MVT::i32));
4930 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4931 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4932 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4933 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4934 ISD::SETGE, ARMcc, DAG, dl);
4935 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
4936 ARMcc, CCR, CmpLo);
4937
4938
4939 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4940 SDValue HiBigShift = Opc == ISD::SRA
4941 ? DAG.getNode(Opc, dl, VT, ShOpHi,
4942 DAG.getConstant(VTBits - 1, dl, VT))
4943 : DAG.getConstant(0, dl, VT);
4944 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4945 ISD::SETGE, ARMcc, DAG, dl);
4946 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
4947 ARMcc, CCR, CmpHi);
4948
4949 SDValue Ops[2] = { Lo, Hi };
4950 return DAG.getMergeValues(Ops, dl);
4951}
4952
4953/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4954/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4955SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
4956 SelectionDAG &DAG) const {
4957 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4957, __PRETTY_FUNCTION__))
;
4958 EVT VT = Op.getValueType();
4959 unsigned VTBits = VT.getSizeInBits();
4960 SDLoc dl(Op);
4961 SDValue ShOpLo = Op.getOperand(0);
4962 SDValue ShOpHi = Op.getOperand(1);
4963 SDValue ShAmt = Op.getOperand(2);
4964 SDValue ARMcc;
4965 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4966
4967 assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void>
(0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 4967, __PRETTY_FUNCTION__))
;
4968 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4969 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4970 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4971 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4972 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4973
4974 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4975 DAG.getConstant(VTBits, dl, MVT::i32));
4976 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4977 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4978 ISD::SETGE, ARMcc, DAG, dl);
4979 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
4980 ARMcc, CCR, CmpHi);
4981
4982 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4983 ISD::SETGE, ARMcc, DAG, dl);
4984 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4985 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
4986 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
4987
4988 SDValue Ops[2] = { Lo, Hi };
4989 return DAG.getMergeValues(Ops, dl);
4990}
4991
4992SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
4993 SelectionDAG &DAG) const {
4994 // The rounding mode is in bits 23:22 of the FPSCR.
4995 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
4996 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
4997 // so that the shift + and get folded into a bitfield extract.
4998 SDLoc dl(Op);
4999 SDValue Ops[] = { DAG.getEntryNode(),
5000 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
5001
5002 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
5003 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
5004 DAG.getConstant(1U << 22, dl, MVT::i32));
5005 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
5006 DAG.getConstant(22, dl, MVT::i32));
5007 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
5008 DAG.getConstant(3, dl, MVT::i32));
5009}
5010
5011static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
5012 const ARMSubtarget *ST) {
5013 SDLoc dl(N);
5014 EVT VT = N->getValueType(0);
5015 if (VT.isVector()) {
5016 assert(ST->hasNEON())((ST->hasNEON()) ? static_cast<void> (0) : __assert_fail
("ST->hasNEON()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5016, __PRETTY_FUNCTION__))
;
5017
5018 // Compute the least significant set bit: LSB = X & -X
5019 SDValue X = N->getOperand(0);
5020 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
5021 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
5022
5023 EVT ElemTy = VT.getVectorElementType();
5024
5025 if (ElemTy == MVT::i8) {
5026 // Compute with: cttz(x) = ctpop(lsb - 1)
5027 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5028 DAG.getTargetConstant(1, dl, ElemTy));
5029 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5030 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
5031 }
5032
5033 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
5034 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
5035 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
5036 unsigned NumBits = ElemTy.getSizeInBits();
5037 SDValue WidthMinus1 =
5038 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5039 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
5040 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
5041 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
5042 }
5043
5044 // Compute with: cttz(x) = ctpop(lsb - 1)
5045
5046 // Since we can only compute the number of bits in a byte with vcnt.8, we
5047 // have to gather the result with pairwise addition (vpaddl) for i16, i32,
5048 // and i64.
5049
5050 // Compute LSB - 1.
5051 SDValue Bits;
5052 if (ElemTy == MVT::i64) {
5053 // Load constant 0xffff'ffff'ffff'ffff to register.
5054 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5055 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
5056 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
5057 } else {
5058 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5059 DAG.getTargetConstant(1, dl, ElemTy));
5060 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5061 }
5062
5063 // Count #bits with vcnt.8.
5064 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5065 SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
5066 SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
5067
5068 // Gather the #bits with vpaddl (pairwise add.)
5069 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5070 SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
5071 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5072 Cnt8);
5073 if (ElemTy == MVT::i16)
5074 return Cnt16;
5075
5076 EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
5077 SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
5078 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5079 Cnt16);
5080 if (ElemTy == MVT::i32)
5081 return Cnt32;
5082
5083 assert(ElemTy == MVT::i64)((ElemTy == MVT::i64) ? static_cast<void> (0) : __assert_fail
("ElemTy == MVT::i64", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5083, __PRETTY_FUNCTION__))
;
5084 SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5085 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5086 Cnt32);
5087 return Cnt64;
5088 }
5089
5090 if (!ST->hasV6T2Ops())
5091 return SDValue();
5092
5093 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
5094 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
5095}
5096
5097/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
5098/// for each 16-bit element from operand, repeated. The basic idea is to
5099/// leverage vcnt to get the 8-bit counts, gather and add the results.
5100///
5101/// Trace for v4i16:
5102/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
5103/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
5104/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
5105/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
5106/// [b0 b1 b2 b3 b4 b5 b6 b7]
5107/// +[b1 b0 b3 b2 b5 b4 b7 b6]
5108/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
5109/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
5110static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
5111 EVT VT = N->getValueType(0);
5112 SDLoc DL(N);
5113
5114 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5115 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
5116 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
5117 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
5118 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
5119 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
5120}
5121
5122/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
5123/// bit-count for each 16-bit element from the operand. We need slightly
5124/// different sequencing for v4i16 and v8i16 to stay within NEON's available
5125/// 64/128-bit registers.
5126///
5127/// Trace for v4i16:
5128/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
5129/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
5130/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
5131/// v4i16:Extracted = [k0 k1 k2 k3 ]
5132static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
5133 EVT VT = N->getValueType(0);
5134 SDLoc DL(N);
5135
5136 SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
5137 if (VT.is64BitVector()) {
5138 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
5139 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
5140 DAG.getIntPtrConstant(0, DL));
5141 } else {
5142 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
5143 BitCounts, DAG.getIntPtrConstant(0, DL));
5144 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
5145 }
5146}
5147
5148/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
5149/// bit-count for each 32-bit element from the operand. The idea here is
5150/// to split the vector into 16-bit elements, leverage the 16-bit count
5151/// routine, and then combine the results.
5152///
5153/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
5154/// input = [v0 v1 ] (vi: 32-bit elements)
5155/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
5156/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
5157/// vrev: N0 = [k1 k0 k3 k2 ]
5158/// [k0 k1 k2 k3 ]
5159/// N1 =+[k1 k0 k3 k2 ]
5160/// [k0 k2 k1 k3 ]
5161/// N2 =+[k1 k3 k0 k2 ]
5162/// [k0 k2 k1 k3 ]
5163/// Extended =+[k1 k3 k0 k2 ]
5164/// [k0 k2 ]
5165/// Extracted=+[k1 k3 ]
5166///
5167static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
5168 EVT VT = N->getValueType(0);
5169 SDLoc DL(N);
5170
5171 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5172
5173 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
5174 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
5175 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
5176 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
5177 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
5178
5179 if (VT.is64BitVector()) {
5180 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
5181 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
5182 DAG.getIntPtrConstant(0, DL));
5183 } else {
5184 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
5185 DAG.getIntPtrConstant(0, DL));
5186 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
5187 }
5188}
5189
5190static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
5191 const ARMSubtarget *ST) {
5192 EVT VT = N->getValueType(0);
5193
5194 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.")((ST->hasNEON() && "Custom ctpop lowering requires NEON."
) ? static_cast<void> (0) : __assert_fail ("ST->hasNEON() && \"Custom ctpop lowering requires NEON.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5194, __PRETTY_FUNCTION__))
;
5195 assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5197, __PRETTY_FUNCTION__))
5196 VT == MVT::v4i16 || VT == MVT::v8i16) &&(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5197, __PRETTY_FUNCTION__))
5197 "Unexpected type for custom ctpop lowering")(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5197, __PRETTY_FUNCTION__))
;
5198
5199 if (VT.getVectorElementType() == MVT::i32)
5200 return lowerCTPOP32BitElements(N, DAG);
5201 else
5202 return lowerCTPOP16BitElements(N, DAG);
5203}
5204
5205static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
5206 const ARMSubtarget *ST) {
5207 EVT VT = N->getValueType(0);
5208 SDLoc dl(N);
5209
5210 if (!VT.isVector())
5211 return SDValue();
5212
5213 // Lower vector shifts on NEON to use VSHL.
5214 assert(ST->hasNEON() && "unexpected vector shift")((ST->hasNEON() && "unexpected vector shift") ? static_cast
<void> (0) : __assert_fail ("ST->hasNEON() && \"unexpected vector shift\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5214, __PRETTY_FUNCTION__))
;
5215
5216 // Left shifts translate directly to the vshiftu intrinsic.
5217 if (N->getOpcode() == ISD::SHL)
5218 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5219 DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
5220 MVT::i32),
5221 N->getOperand(0), N->getOperand(1));
5222
5223 assert((N->getOpcode() == ISD::SRA ||(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "unexpected vector shift opcode") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5224, __PRETTY_FUNCTION__))
5224 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode")(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "unexpected vector shift opcode") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5224, __PRETTY_FUNCTION__))
;
5225
5226 // NEON uses the same intrinsics for both left and right shifts. For
5227 // right shifts, the shift amounts are negative, so negate the vector of
5228 // shift amounts.
5229 EVT ShiftVT = N->getOperand(1).getValueType();
5230 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
5231 getZeroVector(ShiftVT, DAG, dl),
5232 N->getOperand(1));
5233 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
5234 Intrinsic::arm_neon_vshifts :
5235 Intrinsic::arm_neon_vshiftu);
5236 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5237 DAG.getConstant(vshiftInt, dl, MVT::i32),
5238 N->getOperand(0), NegatedCount);
5239}
5240
5241static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
5242 const ARMSubtarget *ST) {
5243 EVT VT = N->getValueType(0);
5244 SDLoc dl(N);
5245
5246 // We can get here for a node like i32 = ISD::SHL i32, i64
5247 if (VT != MVT::i64)
5248 return SDValue();
5249
5250 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA) && "Unknown shift to lower!") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5251, __PRETTY_FUNCTION__))
5251 "Unknown shift to lower!")(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA) && "Unknown shift to lower!") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5251, __PRETTY_FUNCTION__))
;
5252
5253 // We only lower SRA, SRL of 1 here, all others use generic lowering.
5254 if (!isOneConstant(N->getOperand(1)))
5255 return SDValue();
5256
5257 // If we are in thumb mode, we don't have RRX.
5258 if (ST->isThumb1Only()) return SDValue();
5259
5260 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
5261 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5262 DAG.getConstant(0, dl, MVT::i32));
5263 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5264 DAG.getConstant(1, dl, MVT::i32));
5265
5266 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
5267 // captures the result into a carry flag.
5268 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
5269 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
5270
5271 // The low part is an ARMISD::RRX operand, which shifts the carry in.
5272 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
5273
5274 // Merge the pieces into a single i64 value.
5275 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5276}
5277
5278static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
5279 SDValue TmpOp0, TmpOp1;
5280 bool Invert = false;
5281 bool Swap = false;
5282 unsigned Opc = 0;
5283
5284 SDValue Op0 = Op.getOperand(0);
5285 SDValue Op1 = Op.getOperand(1);
5286 SDValue CC = Op.getOperand(2);
5287 EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
5288 EVT VT = Op.getValueType();
5289 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
5290 SDLoc dl(Op);
5291
5292 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
5293 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
5294 // Special-case integer 64-bit equality comparisons. They aren't legal,
5295 // but they can be lowered with a few vector instructions.
5296 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
5297 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
5298 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
5299 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
5300 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
5301 DAG.getCondCode(ISD::SETEQ));
5302 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
5303 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
5304 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
5305 if (SetCCOpcode == ISD::SETNE)
5306 Merged = DAG.getNOT(dl, Merged, CmpVT);
5307 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
5308 return Merged;
5309 }
5310
5311 if (CmpVT.getVectorElementType() == MVT::i64)
5312 // 64-bit comparisons are not legal in general.
5313 return SDValue();
5314
5315 if (Op1.getValueType().isFloatingPoint()) {
5316 switch (SetCCOpcode) {
5317 default: llvm_unreachable("Illegal FP comparison")::llvm::llvm_unreachable_internal("Illegal FP comparison", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5317)
;
5318 case ISD::SETUNE:
5319 case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5320 case ISD::SETOEQ:
5321 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5322 case ISD::SETOLT:
5323 case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5324 case ISD::SETOGT:
5325 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5326 case ISD::SETOLE:
5327 case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5328 case ISD::SETOGE:
5329 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5330 case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5331 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
5332 case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5333 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
5334 case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5335 case ISD::SETONE:
5336 // Expand this to (OLT | OGT).
5337 TmpOp0 = Op0;
5338 TmpOp1 = Op1;
5339 Opc = ISD::OR;
5340 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5341 Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
5342 break;
5343 case ISD::SETUO:
5344 Invert = true;
5345 LLVM_FALLTHROUGH[[clang::fallthrough]];
5346 case ISD::SETO:
5347 // Expand this to (OLT | OGE).
5348 TmpOp0 = Op0;
5349 TmpOp1 = Op1;
5350 Opc = ISD::OR;
5351 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5352 Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
5353 break;
5354 }
5355 } else {
5356 // Integer comparisons.
5357 switch (SetCCOpcode) {
5358 default: llvm_unreachable("Illegal integer comparison")::llvm::llvm_unreachable_internal("Illegal integer comparison"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5358)
;
5359 case ISD::SETNE: Invert = true;
5360 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5361 case ISD::SETLT: Swap = true;
5362 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5363 case ISD::SETLE: Swap = true;
5364 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5365 case ISD::SETULT: Swap = true;
5366 case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
5367 case ISD::SETULE: Swap = true;
5368 case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
5369 }
5370
5371 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
5372 if (Opc == ARMISD::VCEQ) {
5373
5374 SDValue AndOp;
5375 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5376 AndOp = Op0;
5377 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
5378 AndOp = Op1;
5379
5380 // Ignore bitconvert.
5381 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
5382 AndOp = AndOp.getOperand(0);
5383
5384 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
5385 Opc = ARMISD::VTST;
5386 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
5387 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
5388 Invert = !Invert;
5389 }
5390 }
5391 }
5392
5393 if (Swap)
5394 std::swap(Op0, Op1);
5395
5396 // If one of the operands is a constant vector zero, attempt to fold the
5397 // comparison to a specialized compare-against-zero form.
5398 SDValue SingleOp;
5399 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5400 SingleOp = Op0;
5401 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
5402 if (Opc == ARMISD::VCGE)
5403 Opc = ARMISD::VCLEZ;
5404 else if (Opc == ARMISD::VCGT)
5405 Opc = ARMISD::VCLTZ;
5406 SingleOp = Op1;
5407 }
5408
5409 SDValue Result;
5410 if (SingleOp.getNode()) {
5411 switch (Opc) {
5412 case ARMISD::VCEQ:
5413 Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
5414 case ARMISD::VCGE:
5415 Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
5416 case ARMISD::VCLEZ:
5417 Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
5418 case ARMISD::VCGT:
5419 Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
5420 case ARMISD::VCLTZ:
5421 Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
5422 default:
5423 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5424 }
5425 } else {
5426 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5427 }
5428
5429 Result = DAG.getSExtOrTrunc(Result, dl, VT);
5430
5431 if (Invert)
5432 Result = DAG.getNOT(dl, Result, VT);
5433
5434 return Result;
5435}
5436
5437static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) {
5438 SDValue LHS = Op.getOperand(0);
5439 SDValue RHS = Op.getOperand(1);
5440 SDValue Carry = Op.getOperand(2);
5441 SDValue Cond = Op.getOperand(3);
5442 SDLoc DL(Op);
5443
5444 assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.")((LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."
) ? static_cast<void> (0) : __assert_fail ("LHS.getSimpleValueType().isInteger() && \"SETCCE is integer only.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5444, __PRETTY_FUNCTION__))
;
5445
5446 assert(Carry.getOpcode() != ISD::CARRY_FALSE)((Carry.getOpcode() != ISD::CARRY_FALSE) ? static_cast<void
> (0) : __assert_fail ("Carry.getOpcode() != ISD::CARRY_FALSE"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5446, __PRETTY_FUNCTION__))
;
5447 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
5448 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
5449
5450 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
5451 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
5452 SDValue ARMcc = DAG.getConstant(
5453 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
5454 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5455 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
5456 Cmp.getValue(1), SDValue());
5457 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
5458 CCR, Chain.getValue(1));
5459}
5460
5461/// isNEONModifiedImm - Check if the specified splat value corresponds to a
5462/// valid vector constant for a NEON instruction with a "modified immediate"
5463/// operand (e.g., VMOV). If so, return the encoded value.
5464static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
5465 unsigned SplatBitSize, SelectionDAG &DAG,
5466 const SDLoc &dl, EVT &VT, bool is128Bits,
5467 NEONModImmType type) {
5468 unsigned OpCmode, Imm;
5469
5470 // SplatBitSize is set to the smallest size that splats the vector, so a
5471 // zero vector will always have SplatBitSize == 8. However, NEON modified
5472 // immediate instructions others than VMOV do not support the 8-bit encoding
5473 // of a zero vector, and the default encoding of zero is supposed to be the
5474 // 32-bit version.
5475 if (SplatBits == 0)
5476 SplatBitSize = 32;
5477
5478 switch (SplatBitSize) {
5479 case 8:
5480 if (type != VMOVModImm)
5481 return SDValue();
5482 // Any 1-byte value is OK. Op=0, Cmode=1110.
5483 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big")(((SplatBits & ~0xff) == 0 && "one byte splat value is too big"
) ? static_cast<void> (0) : __assert_fail ("(SplatBits & ~0xff) == 0 && \"one byte splat value is too big\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5483, __PRETTY_FUNCTION__))
;
5484 OpCmode = 0xe;
5485 Imm = SplatBits;
5486 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
5487 break;
5488
5489 case 16:
5490 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
5491 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
5492 if ((SplatBits & ~0xff) == 0) {
5493 // Value = 0x00nn: Op=x, Cmode=100x.
5494 OpCmode = 0x8;
5495 Imm = SplatBits;
5496 break;
5497 }
5498 if ((SplatBits & ~0xff00) == 0) {
5499 // Value = 0xnn00: Op=x, Cmode=101x.
5500 OpCmode = 0xa;
5501 Imm = SplatBits >> 8;
5502 break;
5503 }
5504 return SDValue();
5505
5506 case 32:
5507 // NEON's 32-bit VMOV supports splat values where:
5508 // * only one byte is nonzero, or
5509 // * the least significant byte is 0xff and the second byte is nonzero, or
5510 // * the least significant 2 bytes are 0xff and the third is nonzero.
5511 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
5512 if ((SplatBits & ~0xff) == 0) {
5513 // Value = 0x000000nn: Op=x, Cmode=000x.
5514 OpCmode = 0;
5515 Imm = SplatBits;
5516 break;
5517 }
5518 if ((SplatBits & ~0xff00) == 0) {
5519 // Value = 0x0000nn00: Op=x, Cmode=001x.
5520 OpCmode = 0x2;
5521 Imm = SplatBits >> 8;
5522 break;
5523 }
5524 if ((SplatBits & ~0xff0000) == 0) {
5525 // Value = 0x00nn0000: Op=x, Cmode=010x.
5526 OpCmode = 0x4;
5527 Imm = SplatBits >> 16;
5528 break;
5529 }
5530 if ((SplatBits & ~0xff000000) == 0) {
5531 // Value = 0xnn000000: Op=x, Cmode=011x.
5532 OpCmode = 0x6;
5533 Imm = SplatBits >> 24;
5534 break;
5535 }
5536
5537 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
5538 if (type == OtherModImm) return SDValue();
5539
5540 if ((SplatBits & ~0xffff) == 0 &&
5541 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
5542 // Value = 0x0000nnff: Op=x, Cmode=1100.
5543 OpCmode = 0xc;
5544 Imm = SplatBits >> 8;
5545 break;
5546 }
5547
5548 if ((SplatBits & ~0xffffff) == 0 &&
5549 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
5550 // Value = 0x00nnffff: Op=x, Cmode=1101.
5551 OpCmode = 0xd;
5552 Imm = SplatBits >> 16;
5553 break;
5554 }
5555
5556 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
5557 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
5558 // VMOV.I32. A (very) minor optimization would be to replicate the value
5559 // and fall through here to test for a valid 64-bit splat. But, then the
5560 // caller would also need to check and handle the change in size.
5561 return SDValue();
5562
5563 case 64: {
5564 if (type != VMOVModImm)
5565 return SDValue();
5566 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
5567 uint64_t BitMask = 0xff;
5568 uint64_t Val = 0;
5569 unsigned ImmMask = 1;
5570 Imm = 0;
5571 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
5572 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
5573 Val |= BitMask;
5574 Imm |= ImmMask;
5575 } else if ((SplatBits & BitMask) != 0) {
5576 return SDValue();
5577 }
5578 BitMask <<= 8;
5579 ImmMask <<= 1;
5580 }
5581
5582 if (DAG.getDataLayout().isBigEndian())
5583 // swap higher and lower 32 bit word
5584 Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
5585
5586 // Op=1, Cmode=1110.
5587 OpCmode = 0x1e;
5588 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
5589 break;
5590 }
5591
5592 default:
5593 llvm_unreachable("unexpected size for isNEONModifiedImm")::llvm::llvm_unreachable_internal("unexpected size for isNEONModifiedImm"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5593)
;
5594 }
5595
5596 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
5597 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
5598}
5599
5600SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
5601 const ARMSubtarget *ST) const {
5602 bool IsDouble = Op.getValueType() == MVT::f64;
5603 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
5604 const APFloat &FPVal = CFP->getValueAPF();
5605
5606 // Prevent floating-point constants from using literal loads
5607 // when execute-only is enabled.
5608 if (ST->genExecuteOnly()) {
5609 APInt INTVal = FPVal.bitcastToAPInt();
5610 SDLoc DL(CFP);
5611 if (IsDouble) {
5612 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
5613 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
5614 if (!ST->isLittle())
5615 std::swap(Lo, Hi);
5616 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
5617 } else {
5618 return DAG.getConstant(INTVal, DL, MVT::i32);
5619 }
5620 }
5621
5622 if (!ST->hasVFP3())
5623 return SDValue();
5624
5625 // Use the default (constant pool) lowering for double constants when we have
5626 // an SP-only FPU
5627 if (IsDouble && Subtarget->isFPOnlySP())
5628 return SDValue();
5629
5630 // Try splatting with a VMOV.f32...
5631 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
5632
5633 if (ImmVal != -1) {
5634 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
5635 // We have code in place to select a valid ConstantFP already, no need to
5636 // do any mangling.
5637 return Op;
5638 }
5639
5640 // It's a float and we are trying to use NEON operations where
5641 // possible. Lower it to a splat followed by an extract.
5642 SDLoc DL(Op);
5643 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
5644 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
5645 NewVal);
5646 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
5647 DAG.getConstant(0, DL, MVT::i32));
5648 }
5649
5650 // The rest of our options are NEON only, make sure that's allowed before
5651 // proceeding..
5652 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
5653 return SDValue();
5654
5655 EVT VMovVT;
5656 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
5657
5658 // It wouldn't really be worth bothering for doubles except for one very
5659 // important value, which does happen to match: 0.0. So make sure we don't do
5660 // anything stupid.
5661 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
5662 return SDValue();
5663
5664 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
5665 SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
5666 VMovVT, false, VMOVModImm);
5667 if (NewVal != SDValue()) {
5668 SDLoc DL(Op);
5669 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
5670 NewVal);
5671 if (IsDouble)
5672 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5673
5674 // It's a float: cast and extract a vector element.
5675 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5676 VecConstant);
5677 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5678 DAG.getConstant(0, DL, MVT::i32));
5679 }
5680
5681 // Finally, try a VMVN.i32
5682 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
5683 false, VMVNModImm);
5684 if (NewVal != SDValue()) {
5685 SDLoc DL(Op);
5686 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
5687
5688 if (IsDouble)
5689 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5690
5691 // It's a float: cast and extract a vector element.
5692 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5693 VecConstant);
5694 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5695 DAG.getConstant(0, DL, MVT::i32));
5696 }
5697
5698 return SDValue();
5699}
5700
5701// check if an VEXT instruction can handle the shuffle mask when the
5702// vector sources of the shuffle are the same.
5703static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5704 unsigned NumElts = VT.getVectorNumElements();
5705
5706 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5707 if (M[0] < 0)
5708 return false;
5709
5710 Imm = M[0];
5711
5712 // If this is a VEXT shuffle, the immediate value is the index of the first
5713 // element. The other shuffle indices must be the successive elements after
5714 // the first one.
5715 unsigned ExpectedElt = Imm;
5716 for (unsigned i = 1; i < NumElts; ++i) {
5717 // Increment the expected index. If it wraps around, just follow it
5718 // back to index zero and keep going.
5719 ++ExpectedElt;
5720 if (ExpectedElt == NumElts)
5721 ExpectedElt = 0;
5722
5723 if (M[i] < 0) continue; // ignore UNDEF indices
5724 if (ExpectedElt != static_cast<unsigned>(M[i]))
5725 return false;
5726 }
5727
5728 return true;
5729}
5730
5731static bool isVEXTMask(ArrayRef<int> M, EVT VT,
5732 bool &ReverseVEXT, unsigned &Imm) {
5733 unsigned NumElts = VT.getVectorNumElements();
5734 ReverseVEXT = false;
5735
5736 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5737 if (M[0] < 0)
5738 return false;
5739
5740 Imm = M[0];
5741
5742 // If this is a VEXT shuffle, the immediate value is the index of the first
5743 // element. The other shuffle indices must be the successive elements after
5744 // the first one.
5745 unsigned ExpectedElt = Imm;
5746 for (unsigned i = 1; i < NumElts; ++i) {
5747 // Increment the expected index. If it wraps around, it may still be
5748 // a VEXT but the source vectors must be swapped.
5749 ExpectedElt += 1;
5750 if (ExpectedElt == NumElts * 2) {
5751 ExpectedElt = 0;
5752 ReverseVEXT = true;
5753 }
5754
5755 if (M[i] < 0) continue; // ignore UNDEF indices
5756 if (ExpectedElt != static_cast<unsigned>(M[i]))
5757 return false;
5758 }
5759
5760 // Adjust the index value if the source operands will be swapped.
5761 if (ReverseVEXT)
5762 Imm -= NumElts;
5763
5764 return true;
5765}
5766
5767/// isVREVMask - Check if a vector shuffle corresponds to a VREV
5768/// instruction with the specified blocksize. (The order of the elements
5769/// within each block of the vector is reversed.)
5770static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5771 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&(((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64") ? static_cast
<void> (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5772, __PRETTY_FUNCTION__))
5772 "Only possible block sizes for VREV are: 16, 32, 64")(((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64") ? static_cast
<void> (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 5772, __PRETTY_FUNCTION__))
;
5773
5774 unsigned EltSz = VT.getScalarSizeInBits();
5775 if (EltSz == 64)
5776 return false;
5777
5778 unsigned NumElts = VT.getVectorNumElements();
5779 unsigned BlockElts = M[0] + 1;
5780 // If the first shuffle index is UNDEF, be optimistic.
5781 if (M[0] < 0)
5782 BlockElts = BlockSize / EltSz;
5783
5784 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5785 return false;
5786
5787 for (unsigned i = 0; i < NumElts; ++i) {
5788 if (M[i] < 0) continue; // ignore UNDEF indices
5789 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
5790 return false;
5791 }
5792
5793 return true;
5794}
5795
5796static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
5797 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
5798 // range, then 0 is placed into the resulting vector. So pretty much any mask
5799 // of 8 elements can work here.
5800 return VT == MVT::v8i8 && M.size() == 8;
5801}
5802
5803// Checks whether the shuffle mask represents a vector transpose (VTRN) by
5804// checking that pairs of elements in the shuffle mask represent the same index
5805// in each vector, incrementing the expected index by 2 at each step.
5806// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
5807// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
5808// v2={e,f,g,h}
5809// WhichResult gives the offset for each element in the mask based on which
5810// of the two results it belongs to.
5811//
5812// The transpose can be represented either as:
5813// result1 = shufflevector v1, v2, result1_shuffle_mask
5814// result2 = shufflevector v1, v2, result2_shuffle_mask
5815// where v1/v2 and the shuffle masks have the same number of elements
5816// (here WhichResult (see below) indicates which result is being checked)
5817//
5818// or as:
5819// results = shufflevector v1, v2, shuffle_mask
5820// where both results are returned in one vector and the shuffle mask has twice
5821// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
5822// want to check the low half and high half of the shuffle mask as if it were
5823// the other case
5824static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5825 unsigned EltSz = VT.getScalarSizeInBits();
5826 if (EltSz == 64)
5827 return false;
5828
5829 unsigned NumElts = VT.getVectorNumElements();
5830 if (M.size() != NumElts && M.size() != NumElts*2)
5831 return false;
5832
5833 // If the mask is twice as long as the input vector then we need to check the
5834 // upper and lower parts of the mask with a matching value for WhichResult
5835 // FIXME: A mask with only even values will be rejected in case the first
5836 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
5837 // M[0] is used to determine WhichResult
5838 for (unsigned i = 0; i < M.size(); i += NumElts) {
5839 if (M.size() == NumElts * 2)
5840 WhichResult = i / NumElts;
5841 else
5842 WhichResult = M[i] == 0 ? 0 : 1;
5843 for (unsigned j = 0; j < NumElts; j += 2) {
5844 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5845 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
5846 return false;
5847 }
5848 }
5849
5850 if (M.size() == NumElts*2)
5851 WhichResult = 0;
5852
5853 return true;
5854}
5855
5856/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
5857/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5858/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5859static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5860 unsigned EltSz = VT.getScalarSizeInBits();
5861 if (EltSz == 64)
5862 return false;
5863
5864 unsigned NumElts = VT.getVectorNumElements();
5865 if (M.size() != NumElts && M.size() != NumElts*2)
5866 return false;
5867
5868 for (unsigned i = 0; i < M.size(); i += NumElts) {
5869 if (M.size() == NumElts * 2)
5870 WhichResult = i / NumElts;
5871 else
5872 WhichResult = M[i] == 0 ? 0 : 1;
5873 for (unsigned j = 0; j < NumElts; j += 2) {
5874 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5875 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
5876 return false;
5877 }
5878 }
5879
5880 if (M.size() == NumElts*2)
5881 WhichResult = 0;
5882
5883 return true;
5884}
5885
5886// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
5887// that the mask elements are either all even and in steps of size 2 or all odd
5888// and in steps of size 2.
5889// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
5890// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
5891// v2={e,f,g,h}
5892// Requires similar checks to that of isVTRNMask with
5893// respect the how results are returned.
5894static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5895 unsigned EltSz = VT.getScalarSizeInBits();
5896 if (EltSz == 64)
5897 return false;
5898
5899 unsigned NumElts = VT.getVectorNumElements();
5900 if (M.size() != NumElts && M.size() != NumElts*2)
5901 return false;
5902
5903 for (unsigned i = 0; i < M.size(); i += NumElts) {
5904 WhichResult = M[i] == 0 ? 0 : 1;
5905 for (unsigned j = 0; j < NumElts; ++j) {
5906 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
5907 return false;
5908 }
5909 }
5910
5911 if (M.size() == NumElts*2)
5912 WhichResult = 0;
5913
5914 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5915 if (VT.is64BitVector() && EltSz == 32)
5916 return false;
5917
5918 return true;
5919}
5920
5921/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
5922/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5923/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5924static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5925 unsigned EltSz = VT.getScalarSizeInBits();
5926 if (EltSz == 64)
5927 return false;
5928
5929 unsigned NumElts = VT.getVectorNumElements();
5930 if (M.size() != NumElts && M.size() != NumElts*2)
5931 return false;
5932
5933 unsigned Half = NumElts / 2;
5934 for (unsigned i = 0; i < M.size(); i += NumElts) {
5935 WhichResult = M[i] == 0 ? 0 : 1;
5936 for (unsigned j = 0; j < NumElts; j += Half) {
5937 unsigned Idx = WhichResult;
5938 for (unsigned k = 0; k < Half; ++k) {
5939 int MIdx = M[i + j + k];
5940 if (MIdx >= 0 && (unsigned) MIdx != Idx)
5941 return false;
5942 Idx += 2;
5943 }
5944 }
5945 }
5946
5947 if (M.size() == NumElts*2)
5948 WhichResult = 0;
5949
5950 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5951 if (VT.is64BitVector() && EltSz == 32)
5952 return false;
5953
5954 return true;
5955}
5956
5957// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
5958// that pairs of elements of the shufflemask represent the same index in each
5959// vector incrementing sequentially through the vectors.
5960// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
5961// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
5962// v2={e,f,g,h}
5963// Requires similar checks to that of isVTRNMask with respect the how results
5964// are returned.
5965static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5966 unsigned EltSz = VT.getScalarSizeInBits();
5967 if (EltSz == 64)
5968 return false;
5969
5970 unsigned NumElts = VT.getVectorNumElements();
5971 if (M.size() != NumElts && M.size() != NumElts*2)
5972 return false;
5973
5974 for (unsigned i = 0; i < M.size(); i += NumElts) {
5975 WhichResult = M[i] == 0 ? 0 : 1;
5976 unsigned Idx = WhichResult * NumElts / 2;
5977 for (unsigned j = 0; j < NumElts; j += 2) {
5978 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
5979 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
5980 return false;
5981 Idx += 1;
5982 }
5983 }
5984
5985 if (M.size() == NumElts*2)
5986 WhichResult = 0;
5987
5988 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5989 if (VT.is64BitVector() && EltSz == 32)
5990 return false;
5991
5992 return true;
5993}
5994
5995/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
5996/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5997/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5998static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5999 unsigned EltSz = VT.getScalarSizeInBits();
6000 if (EltSz == 64)
6001 return false;
6002
6003 unsigned NumElts = VT.getVectorNumElements();
6004 if (M.size() != NumElts && M.size() != NumElts*2)
6005 return false;
6006
6007 for (unsigned i = 0; i < M.size(); i += NumElts) {
6008 WhichResult = M[i] == 0 ? 0 : 1;
6009 unsigned Idx = WhichResult * NumElts / 2;
6010 for (unsigned j = 0; j < NumElts; j += 2) {
6011 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
6012 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
6013 return false;
6014 Idx += 1;
6015 }
6016 }
6017
6018 if (M.size() == NumElts*2)
6019 WhichResult = 0;
6020
6021 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6022 if (VT.is64BitVector() && EltSz == 32)
6023 return false;
6024
6025 return true;
6026}
6027
6028/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
6029/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
6030static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
6031 unsigned &WhichResult,
6032 bool &isV_UNDEF) {
6033 isV_UNDEF = false;
6034 if (isVTRNMask(ShuffleMask, VT, WhichResult))
6035 return ARMISD::VTRN;
6036 if (isVUZPMask(ShuffleMask, VT, WhichResult))
6037 return ARMISD::VUZP;
6038 if (isVZIPMask(ShuffleMask, VT, WhichResult))
6039 return ARMISD::VZIP;
6040
6041 isV_UNDEF = true;
6042 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
6043 return ARMISD::VTRN;
6044 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6045 return ARMISD::VUZP;
6046 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6047 return ARMISD::VZIP;
6048
6049 return 0;
6050}
6051
6052/// \return true if this is a reverse operation on an vector.
6053static bool isReverseMask(ArrayRef<int> M, EVT VT) {
6054 unsigned NumElts = VT.getVectorNumElements();
6055 // Make sure the mask has the right size.
6056 if (NumElts != M.size())
6057 return false;
6058
6059 // Look for <15, ..., 3, -1, 1, 0>.
6060 for (unsigned i = 0; i != NumElts; ++i)
6061 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
6062 return false;
6063
6064 return true;
6065}
6066
6067// If N is an integer constant that can be moved into a register in one
6068// instruction, return an SDValue of such a constant (will become a MOV
6069// instruction). Otherwise return null.
6070static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
6071 const ARMSubtarget *ST, const SDLoc &dl) {
6072 uint64_t Val;
6073 if (!isa<ConstantSDNode>(N))
6074 return SDValue();
6075 Val = cast<ConstantSDNode>(N)->getZExtValue();
6076
6077 if (ST->isThumb1Only()) {
6078 if (Val <= 255 || ~Val <= 255)
6079 return DAG.getConstant(Val, dl, MVT::i32);
6080 } else {
6081 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
6082 return DAG.getConstant(Val, dl, MVT::i32);
6083 }
6084 return SDValue();
6085}
6086
6087// If this is a case we can't handle, return null and let the default
6088// expansion code take care of it.
6089SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
6090 const ARMSubtarget *ST) const {
6091 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
6092 SDLoc dl(Op);
6093 EVT VT = Op.getValueType();
6094
6095 APInt SplatBits, SplatUndef;
6096 unsigned SplatBitSize;
6097 bool HasAnyUndefs;
6098 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6099 if (SplatUndef.isAllOnesValue())
6100 return DAG.getUNDEF(VT);
6101
6102 if (SplatBitSize <= 64) {
6103 // Check if an immediate VMOV works.
6104 EVT VmovVT;
6105 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
6106 SplatUndef.getZExtValue(), SplatBitSize,
6107 DAG, dl, VmovVT, VT.is128BitVector(),
6108 VMOVModImm);
6109 if (Val.getNode()) {
6110 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
6111 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6112 }
6113
6114 // Try an immediate VMVN.
6115 uint64_t NegatedImm = (~SplatBits).getZExtValue();
6116 Val = isNEONModifiedImm(NegatedImm,
6117 SplatUndef.getZExtValue(), SplatBitSize,
6118 DAG, dl, VmovVT, VT.is128BitVector(),
6119 VMVNModImm);
6120 if (Val.getNode()) {
6121 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
6122 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6123 }
6124
6125 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
6126 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
6127 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
6128 if (ImmVal != -1) {
6129 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
6130 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
6131 }
6132 }
6133 }
6134 }
6135
6136 // Scan through the operands to see if only one value is used.
6137 //
6138 // As an optimisation, even if more than one value is used it may be more
6139 // profitable to splat with one value then change some lanes.
6140 //
6141 // Heuristically we decide to do this if the vector has a "dominant" value,
6142 // defined as splatted to more than half of the lanes.
6143 unsigned NumElts = VT.getVectorNumElements();
6144 bool isOnlyLowElement = true;
6145 bool usesOnlyOneValue = true;
6146 bool hasDominantValue = false;
6147 bool isConstant = true;
6148
6149 // Map of the number of times a particular SDValue appears in the
6150 // element list.
6151 DenseMap<SDValue, unsigned> ValueCounts;
6152 SDValue Value;
6153 for (unsigned i = 0; i < NumElts; ++i) {
6154 SDValue V = Op.getOperand(i);
6155 if (V.isUndef())
6156 continue;
6157 if (i > 0)
6158 isOnlyLowElement = false;
6159 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6160 isConstant = false;
6161
6162 ValueCounts.insert(std::make_pair(V, 0));
6163 unsigned &Count = ValueCounts[V];
6164
6165 // Is this value dominant? (takes up more than half of the lanes)
6166 if (++Count > (NumElts / 2)) {
6167 hasDominantValue = true;
6168 Value = V;
6169 }
6170 }
6171 if (ValueCounts.size() != 1)
6172 usesOnlyOneValue = false;
6173 if (!Value.getNode() && !ValueCounts.empty())
6174 Value = ValueCounts.begin()->first;
6175
6176 if (ValueCounts.empty())
6177 return DAG.getUNDEF(VT);
6178
6179 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
6180 // Keep going if we are hitting this case.
6181 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
6182 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6183
6184 unsigned EltSize = VT.getScalarSizeInBits();
6185
6186 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
6187 // i32 and try again.
6188 if (hasDominantValue && EltSize <= 32) {
6189 if (!isConstant) {
6190 SDValue N;
6191
6192 // If we are VDUPing a value that comes directly from a vector, that will
6193 // cause an unnecessary move to and from a GPR, where instead we could
6194 // just use VDUPLANE. We can only do this if the lane being extracted
6195 // is at a constant index, as the VDUP from lane instructions only have
6196 // constant-index forms.
6197 ConstantSDNode *constIndex;
6198 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6199 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
6200 // We need to create a new undef vector to use for the VDUPLANE if the
6201 // size of the vector from which we get the value is different than the
6202 // size of the vector that we need to create. We will insert the element
6203 // such that the register coalescer will remove unnecessary copies.
6204 if (VT != Value->getOperand(0).getValueType()) {
6205 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
6206 VT.getVectorNumElements();
6207 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6208 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
6209 Value, DAG.getConstant(index, dl, MVT::i32)),
6210 DAG.getConstant(index, dl, MVT::i32));
6211 } else
6212 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6213 Value->getOperand(0), Value->getOperand(1));
6214 } else
6215 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
6216
6217 if (!usesOnlyOneValue) {
6218 // The dominant value was splatted as 'N', but we now have to insert
6219 // all differing elements.
6220 for (unsigned I = 0; I < NumElts; ++I) {
6221 if (Op.getOperand(I) == Value)
6222 continue;
6223 SmallVector<SDValue, 3> Ops;
6224 Ops.push_back(N);
6225 Ops.push_back(Op.getOperand(I));
6226 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
6227 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
6228 }
6229 }
6230 return N;
6231 }
6232 if (VT.getVectorElementType().isFloatingPoint()) {
6233 SmallVector<SDValue, 8> Ops;
6234 for (unsigned i = 0; i < NumElts; ++i)
6235 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
6236 Op.getOperand(i)));
6237 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
6238 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6239 Val = LowerBUILD_VECTOR(Val, DAG, ST);
6240 if (Val.getNode())
6241 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6242 }
6243 if (usesOnlyOneValue) {
6244 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
6245 if (isConstant && Val.getNode())
6246 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
6247 }
6248 }
6249
6250 // If all elements are constants and the case above didn't get hit, fall back
6251 // to the default expansion, which will generate a load from the constant
6252 // pool.
6253 if (isConstant)
6254 return SDValue();
6255
6256 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6257 if (NumElts >= 4) {
6258 SDValue shuffle = ReconstructShuffle(Op, DAG);
6259 if (shuffle != SDValue())
6260 return shuffle;
6261 }
6262
6263 if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
6264 // If we haven't found an efficient lowering, try splitting a 128-bit vector
6265 // into two 64-bit vectors; we might discover a better way to lower it.
6266 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
6267 EVT ExtVT = VT.getVectorElementType();
6268 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
6269 SDValue Lower =
6270 DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
6271 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
6272 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
6273 SDValue Upper = DAG.getBuildVector(
6274 HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
6275 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
6276 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
6277 if (Lower && Upper)
6278 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
6279 }
6280
6281 // Vectors with 32- or 64-bit elements can be built by directly assigning
6282 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
6283 // will be legalized.
6284 if (EltSize >= 32) {
6285 // Do the expansion with floating-point types, since that is what the VFP
6286 // registers are defined to use, and since i64 is not legal.
6287 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6288 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6289 SmallVector<SDValue, 8> Ops;
6290 for (unsigned i = 0; i < NumElts; ++i)
6291 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
6292 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6293 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6294 }
6295
6296 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6297 // know the default expansion would otherwise fall back on something even
6298 // worse. For a vector with one or two non-undef values, that's
6299 // scalar_to_vector for the elements followed by a shuffle (provided the
6300 // shuffle is valid for the target) and materialization element by element
6301 // on the stack followed by a load for everything else.
6302 if (!isConstant && !usesOnlyOneValue) {
6303 SDValue Vec = DAG.getUNDEF(VT);
6304 for (unsigned i = 0 ; i < NumElts; ++i) {
6305 SDValue V = Op.getOperand(i);
6306 if (V.isUndef())
6307 continue;
6308 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
6309 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6310 }
6311 return Vec;
6312 }
6313
6314 return SDValue();
6315}
6316
6317// Gather data to see if the operation can be modelled as a
6318// shuffle in combination with VEXTs.
6319SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
6320 SelectionDAG &DAG) const {
6321 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")((Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6321, __PRETTY_FUNCTION__))
;
6322 SDLoc dl(Op);
6323 EVT VT = Op.getValueType();
6324 unsigned NumElts = VT.getVectorNumElements();
6325
6326 struct ShuffleSourceInfo {
6327 SDValue Vec;
6328 unsigned MinElt = std::numeric_limits<unsigned>::max();
6329 unsigned MaxElt = 0;
6330
6331 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
6332 // be compatible with the shuffle we intend to construct. As a result
6333 // ShuffleVec will be some sliding window into the original Vec.
6334 SDValue ShuffleVec;
6335
6336 // Code should guarantee that element i in Vec starts at element "WindowBase
6337 // + i * WindowScale in ShuffleVec".
6338 int WindowBase = 0;
6339 int WindowScale = 1;
6340
6341 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
6342
6343 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
6344 };
6345
6346 // First gather all vectors used as an immediate source for this BUILD_VECTOR
6347 // node.
6348 SmallVector<ShuffleSourceInfo, 2> Sources;
6349 for (unsigned i = 0; i < NumElts; ++i) {
6350 SDValue V = Op.getOperand(i);
6351 if (V.isUndef())
6352 continue;
6353 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
6354 // A shuffle can only come from building a vector from various
6355 // elements of other vectors.
6356 return SDValue();
6357 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
6358 // Furthermore, shuffles require a constant mask, whereas extractelts
6359 // accept variable indices.
6360 return SDValue();
6361 }
6362
6363 // Add this element source to the list if it's not already there.
6364 SDValue SourceVec = V.getOperand(0);
6365 auto Source = llvm::find(Sources, SourceVec);
6366 if (Source == Sources.end())
6367 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
6368
6369 // Update the minimum and maximum lane number seen.
6370 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
6371 Source->MinElt = std::min(Source->MinElt, EltNo);
6372 Source->MaxElt = std::max(Source->MaxElt, EltNo);
6373 }
6374
6375 // Currently only do something sane when at most two source vectors
6376 // are involved.
6377 if (Sources.size() > 2)
6378 return SDValue();
6379
6380 // Find out the smallest element size among result and two sources, and use
6381 // it as element size to build the shuffle_vector.
6382 EVT SmallestEltTy = VT.getVectorElementType();
6383 for (auto &Source : Sources) {
6384 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
6385 if (SrcEltTy.bitsLT(SmallestEltTy))
6386 SmallestEltTy = SrcEltTy;
6387 }
6388 unsigned ResMultiplier =
6389 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
6390 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
6391 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
6392
6393 // If the source vector is too wide or too narrow, we may nevertheless be able
6394 // to construct a compatible shuffle either by concatenating it with UNDEF or
6395 // extracting a suitable range of elements.
6396 for (auto &Src : Sources) {
6397 EVT SrcVT = Src.ShuffleVec.getValueType();
6398
6399 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
6400 continue;
6401
6402 // This stage of the search produces a source with the same element type as
6403 // the original, but with a total width matching the BUILD_VECTOR output.
6404 EVT EltVT = SrcVT.getVectorElementType();
6405 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
6406 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
6407
6408 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
6409 if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
6410 return SDValue();
6411 // We can pad out the smaller vector for free, so if it's part of a
6412 // shuffle...
6413 Src.ShuffleVec =
6414 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
6415 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
6416 continue;
6417 }
6418
6419 if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
6420 return SDValue();
6421
6422 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
6423 // Span too large for a VEXT to cope
6424 return SDValue();
6425 }
6426
6427 if (Src.MinElt >= NumSrcElts) {
6428 // The extraction can just take the second half
6429 Src.ShuffleVec =
6430 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6431 DAG.getConstant(NumSrcElts, dl, MVT::i32));
6432 Src.WindowBase = -NumSrcElts;
6433 } else if (Src.MaxElt < NumSrcElts) {
6434 // The extraction can just take the first half
6435 Src.ShuffleVec =
6436 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6437 DAG.getConstant(0, dl, MVT::i32));
6438 } else {
6439 // An actual VEXT is needed
6440 SDValue VEXTSrc1 =
6441 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6442 DAG.getConstant(0, dl, MVT::i32));
6443 SDValue VEXTSrc2 =
6444 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6445 DAG.getConstant(NumSrcElts, dl, MVT::i32));
6446
6447 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
6448 VEXTSrc2,
6449 DAG.getConstant(Src.MinElt, dl, MVT::i32));
6450 Src.WindowBase = -Src.MinElt;
6451 }
6452 }
6453
6454 // Another possible incompatibility occurs from the vector element types. We
6455 // can fix this by bitcasting the source vectors to the same type we intend
6456 // for the shuffle.
6457 for (auto &Src : Sources) {
6458 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
6459 if (SrcEltTy == SmallestEltTy)
6460 continue;
6461 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)((ShuffleVT.getVectorElementType() == SmallestEltTy) ? static_cast
<void> (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6461, __PRETTY_FUNCTION__))
;
6462 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
6463 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
6464 Src.WindowBase *= Src.WindowScale;
6465 }
6466
6467 // Final sanity check before we try to actually produce a shuffle.
6468 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6470, __PRETTY_FUNCTION__));; } } while (false)
6469 for (auto Src : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6470, __PRETTY_FUNCTION__));; } } while (false)
6470 assert(Src.ShuffleVec.getValueType() == ShuffleVT);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6470, __PRETTY_FUNCTION__));; } } while (false)
6471 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6470, __PRETTY_FUNCTION__));; } } while (false)
;
6472
6473 // The stars all align, our next step is to produce the mask for the shuffle.
6474 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
6475 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
6476 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
6477 SDValue Entry = Op.getOperand(i);
6478 if (Entry.isUndef())
6479 continue;
6480
6481 auto Src = llvm::find(Sources, Entry.getOperand(0));
6482 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
6483
6484 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
6485 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
6486 // segment.
6487 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
6488 int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
6489 VT.getScalarSizeInBits());
6490 int LanesDefined = BitsDefined / BitsPerShuffleLane;
6491
6492 // This source is expected to fill ResMultiplier lanes of the final shuffle,
6493 // starting at the appropriate offset.
6494 int *LaneMask = &Mask[i * ResMultiplier];
6495
6496 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
6497 ExtractBase += NumElts * (Src - Sources.begin());
6498 for (int j = 0; j < LanesDefined; ++j)
6499 LaneMask[j] = ExtractBase + j;
6500 }
6501
6502 // Final check before we try to produce nonsense...
6503 if (!isShuffleMaskLegal(Mask, ShuffleVT))
6504 return SDValue();
6505
6506 // We can't handle more than two sources. This should have already
6507 // been checked before this point.
6508 assert(Sources.size() <= 2 && "Too many sources!")((Sources.size() <= 2 && "Too many sources!") ? static_cast
<void> (0) : __assert_fail ("Sources.size() <= 2 && \"Too many sources!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6508, __PRETTY_FUNCTION__))
;
6509
6510 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
6511 for (unsigned i = 0; i < Sources.size(); ++i)
6512 ShuffleOps[i] = Sources[i].ShuffleVec;
6513
6514 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
6515 ShuffleOps[1], Mask);
6516 return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
6517}
6518
6519/// isShuffleMaskLegal - Targets can use this to indicate that they only
6520/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
6521/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
6522/// are assumed to be legal.
6523bool
6524ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
6525 EVT VT) const {
6526 if (VT.getVectorNumElements() == 4 &&
6527 (VT.is128BitVector() || VT.is64BitVector())) {
6528 unsigned PFIndexes[4];
6529 for (unsigned i = 0; i != 4; ++i) {
6530 if (M[i] < 0)
6531 PFIndexes[i] = 8;
6532 else
6533 PFIndexes[i] = M[i];
6534 }
6535
6536 // Compute the index in the perfect shuffle table.
6537 unsigned PFTableIndex =
6538 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6539 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6540 unsigned Cost = (PFEntry >> 30);
6541
6542 if (Cost <= 4)
6543 return true;
6544 }
6545
6546 bool ReverseVEXT, isV_UNDEF;
6547 unsigned Imm, WhichResult;
6548
6549 unsigned EltSize = VT.getScalarSizeInBits();
6550 return (EltSize >= 32 ||
6551 ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
6552 isVREVMask(M, VT, 64) ||
6553 isVREVMask(M, VT, 32) ||
6554 isVREVMask(M, VT, 16) ||
6555 isVEXTMask(M, VT, ReverseVEXT, Imm) ||
6556 isVTBLMask(M, VT) ||
6557 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
6558 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
6559}
6560
6561/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
6562/// the specified operations to build the shuffle.
6563static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
6564 SDValue RHS, SelectionDAG &DAG,
6565 const SDLoc &dl) {
6566 unsigned OpNum = (PFEntry >> 26) & 0x0F;
6567 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
6568 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
6569
6570 enum {
6571 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6572 OP_VREV,
6573 OP_VDUP0,
6574 OP_VDUP1,
6575 OP_VDUP2,
6576 OP_VDUP3,
6577 OP_VEXT1,
6578 OP_VEXT2,
6579 OP_VEXT3,
6580 OP_VUZPL, // VUZP, left result
6581 OP_VUZPR, // VUZP, right result
6582 OP_VZIPL, // VZIP, left result
6583 OP_VZIPR, // VZIP, right result
6584 OP_VTRNL, // VTRN, left result
6585 OP_VTRNR // VTRN, right result
6586 };
6587
6588 if (OpNum == OP_COPY) {
6589 if (LHSID == (1*9+2)*9+3) return LHS;
6590 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")((LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!") ?
static_cast<void> (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6590, __PRETTY_FUNCTION__))
;
6591 return RHS;
6592 }
6593
6594 SDValue OpLHS, OpRHS;
6595 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
6596 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
6597 EVT VT = OpLHS.getValueType();
6598
6599 switch (OpNum) {
6600 default: llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6600)
;
6601 case OP_VREV:
6602 // VREV divides the vector in half and swaps within the half.
6603 if (VT.getVectorElementType() == MVT::i32 ||
6604 VT.getVectorElementType() == MVT::f32)
6605 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
6606 // vrev <4 x i16> -> VREV32
6607 if (VT.getVectorElementType() == MVT::i16)
6608 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
6609 // vrev <4 x i8> -> VREV16
6610 assert(VT.getVectorElementType() == MVT::i8)((VT.getVectorElementType() == MVT::i8) ? static_cast<void
> (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6610, __PRETTY_FUNCTION__))
;
6611 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
6612 case OP_VDUP0:
6613 case OP_VDUP1:
6614 case OP_VDUP2:
6615 case OP_VDUP3:
6616 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6617 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
6618 case OP_VEXT1:
6619 case OP_VEXT2:
6620 case OP_VEXT3:
6621 return DAG.getNode(ARMISD::VEXT, dl, VT,
6622 OpLHS, OpRHS,
6623 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
6624 case OP_VUZPL:
6625 case OP_VUZPR:
6626 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
6627 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
6628 case OP_VZIPL:
6629 case OP_VZIPR:
6630 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
6631 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
6632 case OP_VTRNL:
6633 case OP_VTRNR:
6634 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
6635 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
6636 }
6637}
6638
6639static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
6640 ArrayRef<int> ShuffleMask,
6641 SelectionDAG &DAG) {
6642 // Check to see if we can use the VTBL instruction.
6643 SDValue V1 = Op.getOperand(0);
6644 SDValue V2 = Op.getOperand(1);
6645 SDLoc DL(Op);
6646
6647 SmallVector<SDValue, 8> VTBLMask;
6648 for (ArrayRef<int>::iterator
6649 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
6650 VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
6651
6652 if (V2.getNode()->isUndef())
6653 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
6654 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6655
6656 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
6657 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6658}
6659
6660static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
6661 SelectionDAG &DAG) {
6662 SDLoc DL(Op);
6663 SDValue OpLHS = Op.getOperand(0);
6664 EVT VT = OpLHS.getValueType();
6665
6666 assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&(((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6667, __PRETTY_FUNCTION__))
6667 "Expect an v8i16/v16i8 type")(((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6667, __PRETTY_FUNCTION__))
;
6668 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
6669 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
6670 // extract the first 8 bytes into the top double word and the last 8 bytes
6671 // into the bottom double word. The v8i16 case is similar.
6672 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
6673 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
6674 DAG.getConstant(ExtractNum, DL, MVT::i32));
6675}
6676
6677static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
6678 SDValue V1 = Op.getOperand(0);
6679 SDValue V2 = Op.getOperand(1);
6680 SDLoc dl(Op);
6681 EVT VT = Op.getValueType();
6682 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
6683
6684 // Convert shuffles that are directly supported on NEON to target-specific
6685 // DAG nodes, instead of keeping them as shuffles and matching them again
6686 // during code selection. This is more efficient and avoids the possibility
6687 // of inconsistencies between legalization and selection.
6688 // FIXME: floating-point vectors should be canonicalized to integer vectors
6689 // of the same time so that they get CSEd properly.
6690 ArrayRef<int> ShuffleMask = SVN->getMask();
6691
6692 unsigned EltSize = VT.getScalarSizeInBits();
6693 if (EltSize <= 32) {
6694 if (SVN->isSplat()) {
6695 int Lane = SVN->getSplatIndex();
6696 // If this is undef splat, generate it via "just" vdup, if possible.
6697 if (Lane == -1) Lane = 0;
6698
6699 // Test if V1 is a SCALAR_TO_VECTOR.
6700 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
6701 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6702 }
6703 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
6704 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
6705 // reaches it).
6706 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
6707 !isa<ConstantSDNode>(V1.getOperand(0))) {
6708 bool IsScalarToVector = true;
6709 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
6710 if (!V1.getOperand(i).isUndef()) {
6711 IsScalarToVector = false;
6712 break;
6713 }
6714 if (IsScalarToVector)
6715 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6716 }
6717 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
6718 DAG.getConstant(Lane, dl, MVT::i32));
6719 }
6720
6721 bool ReverseVEXT;
6722 unsigned Imm;
6723 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
6724 if (ReverseVEXT)
6725 std::swap(V1, V2);
6726 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
6727 DAG.getConstant(Imm, dl, MVT::i32));
6728 }
6729
6730 if (isVREVMask(ShuffleMask, VT, 64))
6731 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
6732 if (isVREVMask(ShuffleMask, VT, 32))
6733 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
6734 if (isVREVMask(ShuffleMask, VT, 16))
6735 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
6736
6737 if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
6738 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
6739 DAG.getConstant(Imm, dl, MVT::i32));
6740 }
6741
6742 // Check for Neon shuffles that modify both input vectors in place.
6743 // If both results are used, i.e., if there are two shuffles with the same
6744 // source operands and with masks corresponding to both results of one of
6745 // these operations, DAG memoization will ensure that a single node is
6746 // used for both shuffles.
6747 unsigned WhichResult;
6748 bool isV_UNDEF;
6749 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6750 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
6751 if (isV_UNDEF)
6752 V2 = V1;
6753 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
6754 .getValue(WhichResult);
6755 }
6756
6757 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
6758 // shuffles that produce a result larger than their operands with:
6759 // shuffle(concat(v1, undef), concat(v2, undef))
6760 // ->
6761 // shuffle(concat(v1, v2), undef)
6762 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
6763 //
6764 // This is useful in the general case, but there are special cases where
6765 // native shuffles produce larger results: the two-result ops.
6766 //
6767 // Look through the concat when lowering them:
6768 // shuffle(concat(v1, v2), undef)
6769 // ->
6770 // concat(VZIP(v1, v2):0, :1)
6771 //
6772 if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
6773 SDValue SubV1 = V1->getOperand(0);
6774 SDValue SubV2 = V1->getOperand(1);
6775 EVT SubVT = SubV1.getValueType();
6776
6777 // We expect these to have been canonicalized to -1.
6778 assert(llvm::all_of(ShuffleMask, [&](int i) {((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6780, __PRETTY_FUNCTION__))
6779 return i < (int)VT.getVectorNumElements();((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6780, __PRETTY_FUNCTION__))
6780 }) && "Unexpected shuffle index into UNDEF operand!")((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6780, __PRETTY_FUNCTION__))
;
6781
6782 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6783 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
6784 if (isV_UNDEF)
6785 SubV2 = SubV1;
6786 assert((WhichResult == 0) &&(((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? static_cast<void> (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6787, __PRETTY_FUNCTION__))
6787 "In-place shuffle of concat can only have one result!")(((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? static_cast<void> (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6787, __PRETTY_FUNCTION__))
;
6788 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
6789 SubV1, SubV2);
6790 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
6791 Res.getValue(1));
6792 }
6793 }
6794 }
6795
6796 // If the shuffle is not directly supported and it has 4 elements, use
6797 // the PerfectShuffle-generated table to synthesize it from other shuffles.
6798 unsigned NumElts = VT.getVectorNumElements();
6799 if (NumElts == 4) {
6800 unsigned PFIndexes[4];
6801 for (unsigned i = 0; i != 4; ++i) {
6802 if (ShuffleMask[i] < 0)
6803 PFIndexes[i] = 8;
6804 else
6805 PFIndexes[i] = ShuffleMask[i];
6806 }
6807
6808 // Compute the index in the perfect shuffle table.
6809 unsigned PFTableIndex =
6810 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6811 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6812 unsigned Cost = (PFEntry >> 30);
6813
6814 if (Cost <= 4)
6815 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
6816 }
6817
6818 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
6819 if (EltSize >= 32) {
6820 // Do the expansion with floating-point types, since that is what the VFP
6821 // registers are defined to use, and since i64 is not legal.
6822 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6823 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6824 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
6825 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
6826 SmallVector<SDValue, 8> Ops;
6827 for (unsigned i = 0; i < NumElts; ++i) {
6828 if (ShuffleMask[i] < 0)
6829 Ops.push_back(DAG.getUNDEF(EltVT));
6830 else
6831 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
6832 ShuffleMask[i] < (int)NumElts ? V1 : V2,
6833 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
6834 dl, MVT::i32)));
6835 }
6836 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6837 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6838 }
6839
6840 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
6841 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
6842
6843 if (VT == MVT::v8i8)
6844 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
6845 return NewOp;
6846
6847 return SDValue();
6848}
6849
6850static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
6851 // INSERT_VECTOR_ELT is legal only for immediate indexes.
6852 SDValue Lane = Op.getOperand(2);
6853 if (!isa<ConstantSDNode>(Lane))
6854 return SDValue();
6855
6856 return Op;
6857}
6858
6859static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
6860 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
6861 SDValue Lane = Op.getOperand(1);
6862 if (!isa<ConstantSDNode>(Lane))
6863 return SDValue();
6864
6865 SDValue Vec = Op.getOperand(0);
6866 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
6867 SDLoc dl(Op);
6868 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
6869 }
6870
6871 return Op;
6872}
6873
6874static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
6875 // The only time a CONCAT_VECTORS operation can have legal types is when
6876 // two 64-bit vectors are concatenated to a 128-bit vector.
6877 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&((Op.getValueType().is128BitVector() && Op.getNumOperands
() == 2 && "unexpected CONCAT_VECTORS") ? static_cast
<void> (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6878, __PRETTY_FUNCTION__))
6878 "unexpected CONCAT_VECTORS")((Op.getValueType().is128BitVector() && Op.getNumOperands
() == 2 && "unexpected CONCAT_VECTORS") ? static_cast
<void> (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6878, __PRETTY_FUNCTION__))
;
6879 SDLoc dl(Op);
6880 SDValue Val = DAG.getUNDEF(MVT::v2f64);
6881 SDValue Op0 = Op.getOperand(0);
6882 SDValue Op1 = Op.getOperand(1);
6883 if (!Op0.isUndef())
6884 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6885 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
6886 DAG.getIntPtrConstant(0, dl));
6887 if (!Op1.isUndef())
6888 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6889 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
6890 DAG.getIntPtrConstant(1, dl));
6891 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
6892}
6893
6894/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
6895/// element has been zero/sign-extended, depending on the isSigned parameter,
6896/// from an integer type half its size.
6897static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
6898 bool isSigned) {
6899 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
6900 EVT VT = N->getValueType(0);
6901 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
6902 SDNode *BVN = N->getOperand(0).getNode();
6903 if (BVN->getValueType(0) != MVT::v4i32 ||
6904 BVN->getOpcode() != ISD::BUILD_VECTOR)
6905 return false;
6906 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
6907 unsigned HiElt = 1 - LoElt;
6908 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
6909 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
6910 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
6911 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
6912 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
6913 return false;
6914 if (isSigned) {
6915 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
6916 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
6917 return true;
6918 } else {
6919 if (Hi0->isNullValue() && Hi1->isNullValue())
6920 return true;
6921 }
6922 return false;
6923 }
6924
6925 if (N->getOpcode() != ISD::BUILD_VECTOR)
6926 return false;
6927
6928 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
6929 SDNode *Elt = N->getOperand(i).getNode();
6930 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
6931 unsigned EltSize = VT.getScalarSizeInBits();
6932 unsigned HalfSize = EltSize / 2;
6933 if (isSigned) {
6934 if (!isIntN(HalfSize, C->getSExtValue()))
6935 return false;
6936 } else {
6937 if (!isUIntN(HalfSize, C->getZExtValue()))
6938 return false;
6939 }
6940 continue;
6941 }
6942 return false;
6943 }
6944
6945 return true;
6946}
6947
6948/// isSignExtended - Check if a node is a vector value that is sign-extended
6949/// or a constant BUILD_VECTOR with sign-extended elements.
6950static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
6951 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
6952 return true;
6953 if (isExtendedBUILD_VECTOR(N, DAG, true))
6954 return true;
6955 return false;
6956}
6957
6958/// isZeroExtended - Check if a node is a vector value that is zero-extended
6959/// or a constant BUILD_VECTOR with zero-extended elements.
6960static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
6961 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
6962 return true;
6963 if (isExtendedBUILD_VECTOR(N, DAG, false))
6964 return true;
6965 return false;
6966}
6967
6968static EVT getExtensionTo64Bits(const EVT &OrigVT) {
6969 if (OrigVT.getSizeInBits() >= 64)
6970 return OrigVT;
6971
6972 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6972, __PRETTY_FUNCTION__))
;
6973
6974 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
6975 switch (OrigSimpleTy) {
6976 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6976)
;
6977 case MVT::v2i8:
6978 case MVT::v2i16:
6979 return MVT::v2i32;
6980 case MVT::v4i8:
6981 return MVT::v4i16;
6982 }
6983}
6984
6985/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
6986/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
6987/// We insert the required extension here to get the vector to fill a D register.
6988static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
6989 const EVT &OrigTy,
6990 const EVT &ExtTy,
6991 unsigned ExtOpcode) {
6992 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
6993 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
6994 // 64-bits we need to insert a new extension so that it will be 64-bits.
6995 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 6995, __PRETTY_FUNCTION__))
;
6996 if (OrigTy.getSizeInBits() >= 64)
6997 return N;
6998
6999 // Must extend size to at least 64 bits to be used as an operand for VMULL.
7000 EVT NewVT = getExtensionTo64Bits(OrigTy);
7001
7002 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
7003}
7004
7005/// SkipLoadExtensionForVMULL - return a load of the original vector size that
7006/// does not do any sign/zero extension. If the original vector is less
7007/// than 64 bits, an appropriate extension will be added after the load to
7008/// reach a total size of 64 bits. We have to add the extension separately
7009/// because ARM does not have a sign/zero extending load for vectors.
7010static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
7011 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
7012
7013 // The load already has the right type.
7014 if (ExtendedTy == LD->getMemoryVT())
7015 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
7016 LD->getBasePtr(), LD->getPointerInfo(),
7017 LD->getAlignment(), LD->getMemOperand()->getFlags());
7018
7019 // We need to create a zextload/sextload. We cannot just create a load
7020 // followed by a zext/zext node because LowerMUL is also run during normal
7021 // operation legalization where we can't create illegal types.
7022 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
7023 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
7024 LD->getMemoryVT(), LD->getAlignment(),
7025 LD->getMemOperand()->getFlags());
7026}
7027
7028/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
7029/// extending load, or BUILD_VECTOR with extended elements, return the
7030/// unextended value. The unextended vector should be 64 bits so that it can
7031/// be used as an operand to a VMULL instruction. If the original vector size
7032/// before extension is less than 64 bits we add a an extension to resize
7033/// the vector to 64 bits.
7034static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
7035 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
7036 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
7037 N->getOperand(0)->getValueType(0),
7038 N->getValueType(0),
7039 N->getOpcode());
7040
7041 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
7042 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&(((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && "Expected extending load"
) ? static_cast<void> (0) : __assert_fail ("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7043, __PRETTY_FUNCTION__))
7043 "Expected extending load")(((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && "Expected extending load"
) ? static_cast<void> (0) : __assert_fail ("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7043, __PRETTY_FUNCTION__))
;
7044
7045 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
7046 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
7047 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7048 SDValue extLoad =
7049 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
7050 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
7051
7052 return newLoad;
7053 }
7054
7055 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
7056 // have been legalized as a BITCAST from v4i32.
7057 if (N->getOpcode() == ISD::BITCAST) {
7058 SDNode *BVN = N->getOperand(0).getNode();
7059 assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&((BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->
getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7060, __PRETTY_FUNCTION__))
7060 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR")((BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->
getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7060, __PRETTY_FUNCTION__))
;
7061 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
7062 return DAG.getBuildVector(
7063 MVT::v2i32, SDLoc(N),
7064 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
7065 }
7066 // Construct a new BUILD_VECTOR with elements truncated to half the size.
7067 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7067, __PRETTY_FUNCTION__))
;
7068 EVT VT = N->getValueType(0);
7069 unsigned EltSize = VT.getScalarSizeInBits() / 2;
7070 unsigned NumElts = VT.getVectorNumElements();
7071 MVT TruncVT = MVT::getIntegerVT(EltSize);
7072 SmallVector<SDValue, 8> Ops;
7073 SDLoc dl(N);
7074 for (unsigned i = 0; i != NumElts; ++i) {
7075 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
7076 const APInt &CInt = C->getAPIntValue();
7077 // Element types smaller than 32 bits are not legal, so use i32 elements.
7078 // The values are implicitly truncated so sext vs. zext doesn't matter.
7079 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
7080 }
7081 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
7082}
7083
7084static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
7085 unsigned Opcode = N->getOpcode();
7086 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7087 SDNode *N0 = N->getOperand(0).getNode();
7088 SDNode *N1 = N->getOperand(1).getNode();
7089 return N0->hasOneUse() && N1->hasOneUse() &&
7090 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
7091 }
7092 return false;
7093}
7094
7095static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
7096 unsigned Opcode = N->getOpcode();
7097 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7098 SDNode *N0 = N->getOperand(0).getNode();
7099 SDNode *N1 = N->getOperand(1).getNode();
7100 return N0->hasOneUse() && N1->hasOneUse() &&
7101 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
7102 }
7103 return false;
7104}
7105
7106static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
7107 // Multiplications are only custom-lowered for 128-bit vectors so that
7108 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
7109 EVT VT = Op.getValueType();
7110 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7111, __PRETTY_FUNCTION__))
7111 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7111, __PRETTY_FUNCTION__))
;
7112 SDNode *N0 = Op.getOperand(0).getNode();
7113 SDNode *N1 = Op.getOperand(1).getNode();
7114 unsigned NewOpc = 0;
7115 bool isMLA = false;
7116 bool isN0SExt = isSignExtended(N0, DAG);
7117 bool isN1SExt = isSignExtended(N1, DAG);
7118 if (isN0SExt && isN1SExt)
7119 NewOpc = ARMISD::VMULLs;
7120 else {
7121 bool isN0ZExt = isZeroExtended(N0, DAG);
7122 bool isN1ZExt = isZeroExtended(N1, DAG);
7123 if (isN0ZExt && isN1ZExt)
7124 NewOpc = ARMISD::VMULLu;
7125 else if (isN1SExt || isN1ZExt) {
7126 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
7127 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
7128 if (isN1SExt && isAddSubSExt(N0, DAG)) {
7129 NewOpc = ARMISD::VMULLs;
7130 isMLA = true;
7131 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
7132 NewOpc = ARMISD::VMULLu;
7133 isMLA = true;
7134 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
7135 std::swap(N0, N1);
7136 NewOpc = ARMISD::VMULLu;
7137 isMLA = true;
7138 }
7139 }
7140
7141 if (!NewOpc) {
7142 if (VT == MVT::v2i64)
7143 // Fall through to expand this. It is not legal.
7144 return SDValue();
7145 else
7146 // Other vector multiplications are legal.
7147 return Op;
7148 }
7149 }
7150
7151 // Legalize to a VMULL instruction.
7152 SDLoc DL(Op);
7153 SDValue Op0;
7154 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
7155 if (!isMLA) {
7156 Op0 = SkipExtensionForVMULL(N0, DAG);
7157 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7159, __PRETTY_FUNCTION__))
7158 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7159, __PRETTY_FUNCTION__))
7159 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7159, __PRETTY_FUNCTION__))
;
7160 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
7161 }
7162
7163 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
7164 // isel lowering to take advantage of no-stall back to back vmul + vmla.
7165 // vmull q0, d4, d6
7166 // vmlal q0, d5, d6
7167 // is faster than
7168 // vaddl q0, d4, d5
7169 // vmovl q1, d6
7170 // vmul q0, q0, q1
7171 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
7172 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
7173 EVT Op1VT = Op1.getValueType();
7174 return DAG.getNode(N0->getOpcode(), DL, VT,
7175 DAG.getNode(NewOpc, DL, VT,
7176 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
7177 DAG.getNode(NewOpc, DL, VT,
7178 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
7179}
7180
7181static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
7182 SelectionDAG &DAG) {
7183 // TODO: Should this propagate fast-math-flags?
7184
7185 // Convert to float
7186 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
7187 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
7188 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
7189 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
7190 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
7191 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
7192 // Get reciprocal estimate.
7193 // float4 recip = vrecpeq_f32(yf);
7194 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7195 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7196 Y);
7197 // Because char has a smaller range than uchar, we can actually get away
7198 // without any newton steps. This requires that we use a weird bias
7199 // of 0xb000, however (again, this has been exhaustively tested).
7200 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
7201 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
7202 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
7203 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
7204 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
7205 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
7206 // Convert back to short.
7207 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
7208 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
7209 return X;
7210}
7211
7212static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
7213 SelectionDAG &DAG) {
7214 // TODO: Should this propagate fast-math-flags?
7215
7216 SDValue N2;
7217 // Convert to float.
7218 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
7219 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
7220 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
7221 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
7222 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7223 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7224
7225 // Use reciprocal estimate and one refinement step.
7226 // float4 recip = vrecpeq_f32(yf);
7227 // recip *= vrecpsq_f32(yf, recip);
7228 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7229 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7230 N1);
7231 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7232 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7233 N1, N2);
7234 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7235 // Because short has a smaller range than ushort, we can actually get away
7236 // with only a single newton step. This requires that we use a weird bias
7237 // of 89, however (again, this has been exhaustively tested).
7238 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
7239 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7240 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7241 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
7242 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7243 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7244 // Convert back to integer and return.
7245 // return vmovn_s32(vcvt_s32_f32(result));
7246 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7247 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7248 return N0;
7249}
7250
7251static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
7252 EVT VT = Op.getValueType();
7253 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7254, __PRETTY_FUNCTION__))
7254 "unexpected type for custom-lowering ISD::SDIV")(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7254, __PRETTY_FUNCTION__))
;
7255
7256 SDLoc dl(Op);
7257 SDValue N0 = Op.getOperand(0);
7258 SDValue N1 = Op.getOperand(1);
7259 SDValue N2, N3;
7260
7261 if (VT == MVT::v8i8) {
7262 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
7263 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
7264
7265 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7266 DAG.getIntPtrConstant(4, dl));
7267 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7268 DAG.getIntPtrConstant(4, dl));
7269 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7270 DAG.getIntPtrConstant(0, dl));
7271 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7272 DAG.getIntPtrConstant(0, dl));
7273
7274 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
7275 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
7276
7277 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7278 N0 = LowerCONCAT_VECTORS(N0, DAG);
7279
7280 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
7281 return N0;
7282 }
7283 return LowerSDIV_v4i16(N0, N1, dl, DAG);
7284}
7285
7286static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
7287 // TODO: Should this propagate fast-math-flags?
7288 EVT VT = Op.getValueType();
7289 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7290, __PRETTY_FUNCTION__))
7290 "unexpected type for custom-lowering ISD::UDIV")(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7290, __PRETTY_FUNCTION__))
;
7291
7292 SDLoc dl(Op);
7293 SDValue N0 = Op.getOperand(0);
7294 SDValue N1 = Op.getOperand(1);
7295 SDValue N2, N3;
7296
7297 if (VT == MVT::v8i8) {
7298 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
7299 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
7300
7301 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7302 DAG.getIntPtrConstant(4, dl));
7303 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7304 DAG.getIntPtrConstant(4, dl));
7305 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7306 DAG.getIntPtrConstant(0, dl));
7307 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7308 DAG.getIntPtrConstant(0, dl));
7309
7310 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
7311 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
7312
7313 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7314 N0 = LowerCONCAT_VECTORS(N0, DAG);
7315
7316 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
7317 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
7318 MVT::i32),
7319 N0);
7320 return N0;
7321 }
7322
7323 // v4i16 sdiv ... Convert to float.
7324 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
7325 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
7326 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
7327 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
7328 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7329 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7330
7331 // Use reciprocal estimate and two refinement steps.
7332 // float4 recip = vrecpeq_f32(yf);
7333 // recip *= vrecpsq_f32(yf, recip);
7334 // recip *= vrecpsq_f32(yf, recip);
7335 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7336 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7337 BN1);
7338 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7339 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7340 BN1, N2);
7341 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7342 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7343 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7344 BN1, N2);
7345 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7346 // Simply multiplying by the reciprocal estimate can leave us a few ulps
7347 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
7348 // and that it will never cause us to return an answer too large).
7349 // float4 result = as_float4(as_int4(xf*recip) + 2);
7350 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7351 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7352 N1 = DAG.getConstant(2, dl, MVT::v4i32);
7353 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7354 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7355 // Convert back to integer and return.
7356 // return vmovn_u32(vcvt_s32_f32(result));
7357 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7358 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7359 return N0;
7360}
7361
7362static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
7363 EVT VT = Op.getNode()->getValueType(0);
7364 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
7365
7366 unsigned Opc;
7367 bool ExtraOp = false;
7368 switch (Op.getOpcode()) {
7369 default: llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7369)
;
7370 case ISD::ADDC: Opc = ARMISD::ADDC; break;
7371 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
7372 case ISD::SUBC: Opc = ARMISD::SUBC; break;
7373 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
7374 }
7375
7376 if (!ExtraOp)
7377 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7378 Op.getOperand(1));
7379 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7380 Op.getOperand(1), Op.getOperand(2));
7381}
7382
7383SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
7384 assert(Subtarget->isTargetDarwin())((Subtarget->isTargetDarwin()) ? static_cast<void> (
0) : __assert_fail ("Subtarget->isTargetDarwin()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7384, __PRETTY_FUNCTION__))
;
7385
7386 // For iOS, we want to call an alternative entry point: __sincos_stret,
7387 // return values are passed via sret.
7388 SDLoc dl(Op);
7389 SDValue Arg = Op.getOperand(0);
7390 EVT ArgVT = Arg.getValueType();
7391 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
7392 auto PtrVT = getPointerTy(DAG.getDataLayout());
7393
7394 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7395 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7396
7397 // Pair of floats / doubles used to pass the result.
7398 Type *RetTy = StructType::get(ArgTy, ArgTy);
7399 auto &DL = DAG.getDataLayout();
7400
7401 ArgListTy Args;
7402 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
7403 SDValue SRet;
7404 if (ShouldUseSRet) {
7405 // Create stack object for sret.
7406 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
7407 const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
7408 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
7409 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
7410
7411 ArgListEntry Entry;
7412 Entry.Node = SRet;
7413 Entry.Ty = RetTy->getPointerTo();
7414 Entry.IsSExt = false;
7415 Entry.IsZExt = false;
7416 Entry.IsSRet = true;
7417 Args.push_back(Entry);
7418 RetTy = Type::getVoidTy(*DAG.getContext());
7419 }
7420
7421 ArgListEntry Entry;
7422 Entry.Node = Arg;
7423 Entry.Ty = ArgTy;
7424 Entry.IsSExt = false;
7425 Entry.IsZExt = false;
7426 Args.push_back(Entry);
7427
7428 const char *LibcallName =
7429 (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
7430 RTLIB::Libcall LC =
7431 (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
7432 CallingConv::ID CC = getLibcallCallingConv(LC);
7433 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
7434
7435 TargetLowering::CallLoweringInfo CLI(DAG);
7436 CLI.setDebugLoc(dl)
7437 .setChain(DAG.getEntryNode())
7438 .setCallee(CC, RetTy, Callee, std::move(Args))
7439 .setDiscardResult(ShouldUseSRet);
7440 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7441
7442 if (!ShouldUseSRet)
7443 return CallResult.first;
7444
7445 SDValue LoadSin =
7446 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
7447
7448 // Address of cos field.
7449 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
7450 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
7451 SDValue LoadCos =
7452 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
7453
7454 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
7455 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
7456 LoadSin.getValue(0), LoadCos.getValue(0));
7457}
7458
7459SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
7460 bool Signed,
7461 SDValue &Chain) const {
7462 EVT VT = Op.getValueType();
7463 assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7464, __PRETTY_FUNCTION__))
7464 "unexpected type for custom lowering DIV")(((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7464, __PRETTY_FUNCTION__))
;
7465 SDLoc dl(Op);
7466
7467 const auto &DL = DAG.getDataLayout();
7468 const auto &TLI = DAG.getTargetLoweringInfo();
7469
7470 const char *Name = nullptr;
7471 if (Signed)
7472 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
7473 else
7474 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
7475
7476 SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
7477
7478 ARMTargetLowering::ArgListTy Args;
7479
7480 for (auto AI : {1, 0}) {
7481 ArgListEntry Arg;
7482 Arg.Node = Op.getOperand(AI);
7483 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
7484 Args.push_back(Arg);
7485 }
7486
7487 CallLoweringInfo CLI(DAG);
7488 CLI.setDebugLoc(dl)
7489 .setChain(Chain)
7490 .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
7491 ES, std::move(Args));
7492
7493 return LowerCallTo(CLI).first;
7494}
7495
7496SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
7497 bool Signed) const {
7498 assert(Op.getValueType() == MVT::i32 &&((Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7499, __PRETTY_FUNCTION__))
7499 "unexpected type for custom lowering DIV")((Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7499, __PRETTY_FUNCTION__))
;
7500 SDLoc dl(Op);
7501
7502 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
7503 DAG.getEntryNode(), Op.getOperand(1));
7504
7505 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7506}
7507
7508static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
7509 SDLoc DL(N);
7510 SDValue Op = N->getOperand(1);
7511 if (N->getValueType(0) == MVT::i32)
7512 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
7513 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7514 DAG.getConstant(0, DL, MVT::i32));
7515 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7516 DAG.getConstant(1, DL, MVT::i32));
7517 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
7518 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
7519}
7520
7521void ARMTargetLowering::ExpandDIV_Windows(
7522 SDValue Op, SelectionDAG &DAG, bool Signed,
7523 SmallVectorImpl<SDValue> &Results) const {
7524 const auto &DL = DAG.getDataLayout();
7525 const auto &TLI = DAG.getTargetLoweringInfo();
7526
7527 assert(Op.getValueType() == MVT::i64 &&((Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7528, __PRETTY_FUNCTION__))
7528 "unexpected type for custom lowering DIV")((Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7528, __PRETTY_FUNCTION__))
;
7529 SDLoc dl(Op);
7530
7531 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
7532
7533 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7534
7535 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
7536 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
7537 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
7538 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
7539
7540 Results.push_back(Lower);
7541 Results.push_back(Upper);
7542}
7543
7544static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
7545 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
7546 // Acquire/Release load/store is not legal for targets without a dmb or
7547 // equivalent available.
7548 return SDValue();
7549
7550 // Monotonic load/store is legal for all targets.
7551 return Op;
7552}
7553
7554static void ReplaceREADCYCLECOUNTER(SDNode *N,
7555 SmallVectorImpl<SDValue> &Results,
7556 SelectionDAG &DAG,
7557 const ARMSubtarget *Subtarget) {
7558 SDLoc DL(N);
7559 // Under Power Management extensions, the cycle-count is:
7560 // mrc p15, #0, <Rt>, c9, c13, #0
7561 SDValue Ops[] = { N->getOperand(0), // Chain
7562 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
7563 DAG.getConstant(15, DL, MVT::i32),
7564 DAG.getConstant(0, DL, MVT::i32),
7565 DAG.getConstant(9, DL, MVT::i32),
7566 DAG.getConstant(13, DL, MVT::i32),
7567 DAG.getConstant(0, DL, MVT::i32)
7568 };
7569
7570 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
7571 DAG.getVTList(MVT::i32, MVT::Other), Ops);
7572 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
7573 DAG.getConstant(0, DL, MVT::i32)));
7574 Results.push_back(Cycles32.getValue(1));
7575}
7576
7577static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
7578 SDLoc dl(V.getNode());
7579 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
7580 SDValue VHi = DAG.getAnyExtOrTrunc(
7581 DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
7582 dl, MVT::i32);
7583 SDValue RegClass =
7584 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
7585 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
7586 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
7587 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
7588 return SDValue(
7589 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
7590}
7591
7592static void ReplaceCMP_SWAP_64Results(SDNode *N,
7593 SmallVectorImpl<SDValue> & Results,
7594 SelectionDAG &DAG) {
7595 assert(N->getValueType(0) == MVT::i64 &&((N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7596, __PRETTY_FUNCTION__))
7596 "AtomicCmpSwap on types less than 64 should be legal")((N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7596, __PRETTY_FUNCTION__))
;
7597 SDValue Ops[] = {N->getOperand(1),
7598 createGPRPairNode(DAG, N->getOperand(2)),
7599 createGPRPairNode(DAG, N->getOperand(3)),
7600 N->getOperand(0)};
7601 SDNode *CmpSwap = DAG.getMachineNode(
7602 ARM::CMP_SWAP_64, SDLoc(N),
7603 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
7604
7605 MachineFunction &MF = DAG.getMachineFunction();
7606 MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
7607 MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
7608 cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
7609
7610 Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32,
7611 SDValue(CmpSwap, 0)));
7612 Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32,
7613 SDValue(CmpSwap, 0)));
7614 Results.push_back(SDValue(CmpSwap, 2));
7615}
7616
7617static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
7618 SelectionDAG &DAG) {
7619 const auto &TLI = DAG.getTargetLoweringInfo();
7620
7621 assert(Subtarget.getTargetTriple().isOSMSVCRT() &&((Subtarget.getTargetTriple().isOSMSVCRT() && "Custom lowering is MSVCRT specific!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.getTargetTriple().isOSMSVCRT() && \"Custom lowering is MSVCRT specific!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7622, __PRETTY_FUNCTION__))
7622 "Custom lowering is MSVCRT specific!")((Subtarget.getTargetTriple().isOSMSVCRT() && "Custom lowering is MSVCRT specific!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.getTargetTriple().isOSMSVCRT() && \"Custom lowering is MSVCRT specific!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7622, __PRETTY_FUNCTION__))
;
7623
7624 SDLoc dl(Op);
7625 SDValue Val = Op.getOperand(0);
7626 MVT Ty = Val->getSimpleValueType(0);
7627 SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
7628 SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
7629 TLI.getPointerTy(DAG.getDataLayout()));
7630
7631 TargetLowering::ArgListTy Args;
7632 TargetLowering::ArgListEntry Entry;
7633
7634 Entry.Node = Val;
7635 Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
7636 Entry.IsZExt = true;
7637 Args.push_back(Entry);
7638
7639 Entry.Node = Exponent;
7640 Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
7641 Entry.IsZExt = true;
7642 Args.push_back(Entry);
7643
7644 Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
7645
7646 // In the in-chain to the call is the entry node If we are emitting a
7647 // tailcall, the chain will be mutated if the node has a non-entry input
7648 // chain.
7649 SDValue InChain = DAG.getEntryNode();
7650 SDValue TCChain = InChain;
7651
7652 const auto *F = DAG.getMachineFunction().getFunction();
7653 bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
7654 F->getReturnType() == LCRTy;
7655 if (IsTC)
7656 InChain = TCChain;
7657
7658 TargetLowering::CallLoweringInfo CLI(DAG);
7659 CLI.setDebugLoc(dl)
7660 .setChain(InChain)
7661 .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
7662 .setTailCall(IsTC);
7663 std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
7664
7665 // Return the chain (the DAG root) if it is a tail call
7666 return !CI.second.getNode() ? DAG.getRoot() : CI.first;
7667}
7668
7669SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
7670 switch (Op.getOpcode()) {
7671 default: llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn306458/lib/Target/ARM/ARMISelLowering.cpp"
, 7671)
;
7672 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
7673 case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
7674 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
7675 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
7676 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
7677 case ISD::SELECT: return LowerSELECT(Op, DAG);
7678 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
7679 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
7680 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
7681 case ISD::VASTART: return LowerVASTART(Op, DAG);
7682 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7683 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
7684 case ISD::SINT_TO_FP:
7685 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
7686 case ISD::FP_TO_SINT:
7687 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
7688 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
7689 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
7690 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
7691 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
7692 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
7693 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
7694 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
7695 Subtarget);
7696 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
7697 case ISD::SHL:
7698 case ISD::SRL:
7699 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
7700 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
7701 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
7702 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
7703 case ISD::SRL_PARTS:
7704 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
7705 case ISD::CTTZ:
7706 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
7707 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
7708 case ISD::SETCC: return LowerVSETCC(Op, DAG);
7709 case ISD::SETCCE: return LowerSETCCE(Op, DAG);
7710 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
7711 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
7712 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
7713 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
7714 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
7715 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
7716 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
7717 case ISD::MUL: return LowerMUL(Op, DAG);
7718 case ISD::SDIV:
7719 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
7720 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
7721 return LowerSDIV(Op, DAG);
7722 case ISD::UDIV:
7723 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
7724 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
7725 return LowerUDIV(Op, DAG);
7726 case ISD::ADDC:
7727 case ISD::ADDE:
7728 case ISD::SUBC:
7729 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
7730 case ISD::SADDO:
7731 case ISD::UADDO:
7732 case ISD::SSUBO:
7733 case ISD::USUBO:
7734 return LowerXALUO(Op, DAG);
7735 case ISD::ATOMIC_LOAD:
7736 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
7737 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
7738 case<