Bug Summary

File:lib/Target/ARM/ARMISelLowering.cpp
Warning:line 245, column 20
Excessive padding in 'struct (anonymous at /build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp:245:20)' (8 padding bytes, where 0 is optimal). Optimal fields order: Name, Op, Cond, consider reordering the fields or adding explicit padding members

Annotated Source Code

1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARMISelLowering.h"
16#include "ARMBaseInstrInfo.h"
17#include "ARMBaseRegisterInfo.h"
18#include "ARMCallingConv.h"
19#include "ARMConstantPoolValue.h"
20#include "ARMMachineFunctionInfo.h"
21#include "ARMPerfectShuffle.h"
22#include "ARMRegisterInfo.h"
23#include "ARMSelectionDAGInfo.h"
24#include "ARMSubtarget.h"
25#include "MCTargetDesc/ARMAddressingModes.h"
26#include "MCTargetDesc/ARMBaseInfo.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/SmallPtrSet.h"
35#include "llvm/ADT/SmallVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringExtras.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/StringSwitch.h"
40#include "llvm/ADT/Triple.h"
41#include "llvm/ADT/Twine.h"
42#include "llvm/Analysis/VectorUtils.h"
43#include "llvm/CodeGen/CallingConvLower.h"
44#include "llvm/CodeGen/ISDOpcodes.h"
45#include "llvm/CodeGen/IntrinsicLowering.h"
46#include "llvm/CodeGen/MachineBasicBlock.h"
47#include "llvm/CodeGen/MachineConstantPool.h"
48#include "llvm/CodeGen/MachineFrameInfo.h"
49#include "llvm/CodeGen/MachineFunction.h"
50#include "llvm/CodeGen/MachineInstr.h"
51#include "llvm/CodeGen/MachineInstrBuilder.h"
52#include "llvm/CodeGen/MachineJumpTableInfo.h"
53#include "llvm/CodeGen/MachineMemOperand.h"
54#include "llvm/CodeGen/MachineOperand.h"
55#include "llvm/CodeGen/MachineRegisterInfo.h"
56#include "llvm/CodeGen/MachineValueType.h"
57#include "llvm/CodeGen/RuntimeLibcalls.h"
58#include "llvm/CodeGen/SelectionDAG.h"
59#include "llvm/CodeGen/SelectionDAGNodes.h"
60#include "llvm/CodeGen/TargetInstrInfo.h"
61#include "llvm/CodeGen/TargetLowering.h"
62#include "llvm/CodeGen/TargetOpcodes.h"
63#include "llvm/CodeGen/TargetRegisterInfo.h"
64#include "llvm/CodeGen/TargetSubtargetInfo.h"
65#include "llvm/CodeGen/ValueTypes.h"
66#include "llvm/IR/Attributes.h"
67#include "llvm/IR/CallingConv.h"
68#include "llvm/IR/Constant.h"
69#include "llvm/IR/Constants.h"
70#include "llvm/IR/DataLayout.h"
71#include "llvm/IR/DebugLoc.h"
72#include "llvm/IR/DerivedTypes.h"
73#include "llvm/IR/Function.h"
74#include "llvm/IR/GlobalAlias.h"
75#include "llvm/IR/GlobalValue.h"
76#include "llvm/IR/GlobalVariable.h"
77#include "llvm/IR/IRBuilder.h"
78#include "llvm/IR/InlineAsm.h"
79#include "llvm/IR/Instruction.h"
80#include "llvm/IR/Instructions.h"
81#include "llvm/IR/IntrinsicInst.h"
82#include "llvm/IR/Intrinsics.h"
83#include "llvm/IR/Module.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/User.h"
86#include "llvm/IR/Value.h"
87#include "llvm/MC/MCInstrDesc.h"
88#include "llvm/MC/MCInstrItineraries.h"
89#include "llvm/MC/MCRegisterInfo.h"
90#include "llvm/MC/MCSchedule.h"
91#include "llvm/Support/AtomicOrdering.h"
92#include "llvm/Support/BranchProbability.h"
93#include "llvm/Support/Casting.h"
94#include "llvm/Support/CodeGen.h"
95#include "llvm/Support/CommandLine.h"
96#include "llvm/Support/Compiler.h"
97#include "llvm/Support/Debug.h"
98#include "llvm/Support/ErrorHandling.h"
99#include "llvm/Support/KnownBits.h"
100#include "llvm/Support/MathExtras.h"
101#include "llvm/Support/raw_ostream.h"
102#include "llvm/Target/TargetMachine.h"
103#include "llvm/Target/TargetOptions.h"
104#include <algorithm>
105#include <cassert>
106#include <cstdint>
107#include <cstdlib>
108#include <iterator>
109#include <limits>
110#include <string>
111#include <tuple>
112#include <utility>
113#include <vector>
114
115using namespace llvm;
116
117#define DEBUG_TYPE"arm-isel" "arm-isel"
118
119STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
120STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt"
, "Number of GAs materialized with movw + movt", {0}, false}
;
121STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals"
, "Number of loops generated for byval arguments", {0}, false
}
;
122STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
, {0}, false}
123 "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
, {0}, false}
;
124
125static cl::opt<bool>
126ARMInterworking("arm-interworking", cl::Hidden,
127 cl::desc("Enable / disable ARM interworking (for debugging only)"),
128 cl::init(true));
129
130static cl::opt<bool> EnableConstpoolPromotion(
131 "arm-promote-constant", cl::Hidden,
132 cl::desc("Enable / disable promotion of unnamed_addr constants into "
133 "constant pools"),
134 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
135static cl::opt<unsigned> ConstpoolPromotionMaxSize(
136 "arm-promote-constant-max-size", cl::Hidden,
137 cl::desc("Maximum size of constant to promote into a constant pool"),
138 cl::init(64));
139static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
140 "arm-promote-constant-max-total", cl::Hidden,
141 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
142 cl::init(128));
143
144// The APCS parameter registers.
145static const MCPhysReg GPRArgRegs[] = {
146 ARM::R0, ARM::R1, ARM::R2, ARM::R3
147};
148
149void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
150 MVT PromotedBitwiseVT) {
151 if (VT != PromotedLdStVT) {
152 setOperationAction(ISD::LOAD, VT, Promote);
153 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
154
155 setOperationAction(ISD::STORE, VT, Promote);
156 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
157 }
158
159 MVT ElemTy = VT.getVectorElementType();
160 if (ElemTy != MVT::f64)
161 setOperationAction(ISD::SETCC, VT, Custom);
162 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
163 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
164 if (ElemTy == MVT::i32) {
165 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
166 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
167 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
168 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
169 } else {
170 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
171 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
172 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
173 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
174 }
175 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
176 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
177 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
178 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
179 setOperationAction(ISD::SELECT, VT, Expand);
180 setOperationAction(ISD::SELECT_CC, VT, Expand);
181 setOperationAction(ISD::VSELECT, VT, Expand);
182 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
183 if (VT.isInteger()) {
184 setOperationAction(ISD::SHL, VT, Custom);
185 setOperationAction(ISD::SRA, VT, Custom);
186 setOperationAction(ISD::SRL, VT, Custom);
187 }
188
189 // Promote all bit-wise operations.
190 if (VT.isInteger() && VT != PromotedBitwiseVT) {
191 setOperationAction(ISD::AND, VT, Promote);
192 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
193 setOperationAction(ISD::OR, VT, Promote);
194 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
195 setOperationAction(ISD::XOR, VT, Promote);
196 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
197 }
198
199 // Neon does not support vector divide/remainder operations.
200 setOperationAction(ISD::SDIV, VT, Expand);
201 setOperationAction(ISD::UDIV, VT, Expand);
202 setOperationAction(ISD::FDIV, VT, Expand);
203 setOperationAction(ISD::SREM, VT, Expand);
204 setOperationAction(ISD::UREM, VT, Expand);
205 setOperationAction(ISD::FREM, VT, Expand);
206
207 if (!VT.isFloatingPoint() &&
208 VT != MVT::v2i64 && VT != MVT::v1i64)
209 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210 setOperationAction(Opcode, VT, Legal);
211}
212
213void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
214 addRegisterClass(VT, &ARM::DPRRegClass);
215 addTypeForNEON(VT, MVT::f64, MVT::v2i32);
216}
217
218void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
219 addRegisterClass(VT, &ARM::DPairRegClass);
220 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
221}
222
223ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
224 const ARMSubtarget &STI)
225 : TargetLowering(TM), Subtarget(&STI) {
226 RegInfo = Subtarget->getRegisterInfo();
227 Itins = Subtarget->getInstrItineraryData();
228
229 setBooleanContents(ZeroOrOneBooleanContent);
230 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
231
232 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
233 !Subtarget->isTargetWatchOS()) {
234 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
235 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
236 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
237 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
238 : CallingConv::ARM_AAPCS);
239 }
240
241 if (Subtarget->isTargetMachO()) {
242 // Uses VFP for Thumb libfuncs if available.
243 if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
244 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
245 static const struct {
Excessive padding in 'struct (anonymous at /build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp:245:20)' (8 padding bytes, where 0 is optimal). Optimal fields order: Name, Op, Cond, consider reordering the fields or adding explicit padding members
246 const RTLIB::Libcall Op;
247 const char * const Name;
248 const ISD::CondCode Cond;
249 } LibraryCalls[] = {
250 // Single-precision floating-point arithmetic.
251 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
252 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
253 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
254 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
255
256 // Double-precision floating-point arithmetic.
257 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
258 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
259 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
260 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
261
262 // Single-precision comparisons.
263 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
264 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
265 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
266 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
267 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
268 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
269 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
270 { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
271
272 // Double-precision comparisons.
273 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
274 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
275 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
276 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
277 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
278 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
279 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
280 { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
281
282 // Floating-point to integer conversions.
283 // i64 conversions are done via library routines even when generating VFP
284 // instructions, so use the same ones.
285 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
286 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
287 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
288 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
289
290 // Conversions between floating types.
291 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
292 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
293
294 // Integer to floating-point conversions.
295 // i64 conversions are done via library routines even when generating VFP
296 // instructions, so use the same ones.
297 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
298 // e.g., __floatunsidf vs. __floatunssidfvfp.
299 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
300 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
301 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
302 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
303 };
304
305 for (const auto &LC : LibraryCalls) {
306 setLibcallName(LC.Op, LC.Name);
307 if (LC.Cond != ISD::SETCC_INVALID)
308 setCmpLibcallCC(LC.Op, LC.Cond);
309 }
310 }
311
312 // Set the correct calling convention for ARMv7k WatchOS. It's just
313 // AAPCS_VFP for functions as simple as libcalls.
314 if (Subtarget->isTargetWatchABI()) {
315 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
316 setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
317 }
318 }
319
320 // These libcalls are not available in 32-bit.
321 setLibcallName(RTLIB::SHL_I128, nullptr);
322 setLibcallName(RTLIB::SRL_I128, nullptr);
323 setLibcallName(RTLIB::SRA_I128, nullptr);
324
325 // RTLIB
326 if (Subtarget->isAAPCS_ABI() &&
327 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
328 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
329 static const struct {
330 const RTLIB::Libcall Op;
331 const char * const Name;
332 const CallingConv::ID CC;
333 const ISD::CondCode Cond;
334 } LibraryCalls[] = {
335 // Double-precision floating-point arithmetic helper functions
336 // RTABI chapter 4.1.2, Table 2
337 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
338 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
339 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
340 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
341
342 // Double-precision floating-point comparison helper functions
343 // RTABI chapter 4.1.2, Table 3
344 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
345 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
346 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
347 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
348 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
349 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
350 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
351 { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
352
353 // Single-precision floating-point arithmetic helper functions
354 // RTABI chapter 4.1.2, Table 4
355 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
356 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
357 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
358 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
359
360 // Single-precision floating-point comparison helper functions
361 // RTABI chapter 4.1.2, Table 5
362 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
363 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
364 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
365 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
366 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
367 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
368 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
369 { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
370
371 // Floating-point to integer conversions.
372 // RTABI chapter 4.1.2, Table 6
373 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
374 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
375 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
376 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
377 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
381
382 // Conversions between floating types.
383 // RTABI chapter 4.1.2, Table 7
384 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
385 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
386 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387
388 // Integer to floating-point conversions.
389 // RTABI chapter 4.1.2, Table 8
390 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
391 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
392 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
393 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
394 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398
399 // Long long helper functions
400 // RTABI chapter 4.2, Table 9
401 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
402 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
403 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405
406 // Integer division functions
407 // RTABI chapter 4.3.1
408 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
409 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
410 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
411 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
412 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
413 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
414 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
415 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
416 };
417
418 for (const auto &LC : LibraryCalls) {
419 setLibcallName(LC.Op, LC.Name);
420 setLibcallCallingConv(LC.Op, LC.CC);
421 if (LC.Cond != ISD::SETCC_INVALID)
422 setCmpLibcallCC(LC.Op, LC.Cond);
423 }
424
425 // EABI dependent RTLIB
426 if (TM.Options.EABIVersion == EABI::EABI4 ||
427 TM.Options.EABIVersion == EABI::EABI5) {
428 static const struct {
429 const RTLIB::Libcall Op;
430 const char *const Name;
431 const CallingConv::ID CC;
432 const ISD::CondCode Cond;
433 } MemOpsLibraryCalls[] = {
434 // Memory operations
435 // RTABI chapter 4.3.4
436 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
437 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
438 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
439 };
440
441 for (const auto &LC : MemOpsLibraryCalls) {
442 setLibcallName(LC.Op, LC.Name);
443 setLibcallCallingConv(LC.Op, LC.CC);
444 if (LC.Cond != ISD::SETCC_INVALID)
445 setCmpLibcallCC(LC.Op, LC.Cond);
446 }
447 }
448 }
449
450 if (Subtarget->isTargetWindows()) {
451 static const struct {
452 const RTLIB::Libcall Op;
453 const char * const Name;
454 const CallingConv::ID CC;
455 } LibraryCalls[] = {
456 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
457 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
458 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
459 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
460 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
461 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
462 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
463 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
464 };
465
466 for (const auto &LC : LibraryCalls) {
467 setLibcallName(LC.Op, LC.Name);
468 setLibcallCallingConv(LC.Op, LC.CC);
469 }
470 }
471
472 // Use divmod compiler-rt calls for iOS 5.0 and later.
473 if (Subtarget->isTargetMachO() &&
474 !(Subtarget->isTargetIOS() &&
475 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
476 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
477 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
478 }
479
480 // The half <-> float conversion functions are always soft-float on
481 // non-watchos platforms, but are needed for some targets which use a
482 // hard-float calling convention by default.
483 if (!Subtarget->isTargetWatchABI()) {
484 if (Subtarget->isAAPCS_ABI()) {
485 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
486 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
487 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
488 } else {
489 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
490 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
491 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
492 }
493 }
494
495 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
496 // a __gnu_ prefix (which is the default).
497 if (Subtarget->isTargetAEABI()) {
498 static const struct {
499 const RTLIB::Libcall Op;
500 const char * const Name;
501 const CallingConv::ID CC;
502 } LibraryCalls[] = {
503 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
504 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
505 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
506 };
507
508 for (const auto &LC : LibraryCalls) {
509 setLibcallName(LC.Op, LC.Name);
510 setLibcallCallingConv(LC.Op, LC.CC);
511 }
512 }
513
514 if (Subtarget->isThumb1Only())
515 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
516 else
517 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
518
519 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
520 !Subtarget->isThumb1Only()) {
521 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
522 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
523 }
524
525 for (MVT VT : MVT::vector_valuetypes()) {
526 for (MVT InnerVT : MVT::vector_valuetypes()) {
527 setTruncStoreAction(VT, InnerVT, Expand);
528 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
529 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
530 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
531 }
532
533 setOperationAction(ISD::MULHS, VT, Expand);
534 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
535 setOperationAction(ISD::MULHU, VT, Expand);
536 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
537
538 setOperationAction(ISD::BSWAP, VT, Expand);
539 }
540
541 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
542 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
543
544 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
545 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
546
547 if (Subtarget->hasNEON()) {
548 addDRTypeForNEON(MVT::v2f32);
549 addDRTypeForNEON(MVT::v8i8);
550 addDRTypeForNEON(MVT::v4i16);
551 addDRTypeForNEON(MVT::v2i32);
552 addDRTypeForNEON(MVT::v1i64);
553
554 addQRTypeForNEON(MVT::v4f32);
555 addQRTypeForNEON(MVT::v2f64);
556 addQRTypeForNEON(MVT::v16i8);
557 addQRTypeForNEON(MVT::v8i16);
558 addQRTypeForNEON(MVT::v4i32);
559 addQRTypeForNEON(MVT::v2i64);
560
561 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
562 // neither Neon nor VFP support any arithmetic operations on it.
563 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
564 // supported for v4f32.
565 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
566 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
567 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
568 // FIXME: Code duplication: FDIV and FREM are expanded always, see
569 // ARMTargetLowering::addTypeForNEON method for details.
570 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
571 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
572 // FIXME: Create unittest.
573 // In another words, find a way when "copysign" appears in DAG with vector
574 // operands.
575 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
576 // FIXME: Code duplication: SETCC has custom operation action, see
577 // ARMTargetLowering::addTypeForNEON method for details.
578 setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
579 // FIXME: Create unittest for FNEG and for FABS.
580 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
581 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
582 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
583 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
584 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
585 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
586 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
587 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
588 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
589 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
590 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
591 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
592 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
593 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
594 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
595 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
596 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
597 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
598
599 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
600 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
601 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
602 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
603 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
604 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
605 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
606 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
607 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
608 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
609 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
610 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
611 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
612 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
613
614 // Mark v2f32 intrinsics.
615 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
616 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
617 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
618 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
619 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
620 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
621 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
622 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
623 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
624 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
625 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
626 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
627 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
628 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
629
630 // Neon does not support some operations on v1i64 and v2i64 types.
631 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
632 // Custom handling for some quad-vector types to detect VMULL.
633 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
634 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
635 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
636 // Custom handling for some vector types to avoid expensive expansions
637 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
638 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
639 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
640 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
641 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
642 // a destination type that is wider than the source, and nor does
643 // it have a FP_TO_[SU]INT instruction with a narrower destination than
644 // source.
645 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
646 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
647 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
648 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
649
650 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
651 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
652
653 // NEON does not have single instruction CTPOP for vectors with element
654 // types wider than 8-bits. However, custom lowering can leverage the
655 // v8i8/v16i8 vcnt instruction.
656 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
657 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
658 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
659 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
660 setOperationAction(ISD::CTPOP, MVT::v1i64, Expand);
661 setOperationAction(ISD::CTPOP, MVT::v2i64, Expand);
662
663 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
664 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
665
666 // NEON does not have single instruction CTTZ for vectors.
667 setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
668 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
669 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
670 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
671
672 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
673 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
674 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
675 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
676
677 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
678 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
679 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
680 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
681
682 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
683 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
684 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
685 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
686
687 // NEON only has FMA instructions as of VFP4.
688 if (!Subtarget->hasVFP4()) {
689 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
690 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
691 }
692
693 setTargetDAGCombine(ISD::INTRINSIC_VOID);
694 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
695 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
696 setTargetDAGCombine(ISD::SHL);
697 setTargetDAGCombine(ISD::SRL);
698 setTargetDAGCombine(ISD::SRA);
699 setTargetDAGCombine(ISD::SIGN_EXTEND);
700 setTargetDAGCombine(ISD::ZERO_EXTEND);
701 setTargetDAGCombine(ISD::ANY_EXTEND);
702 setTargetDAGCombine(ISD::BUILD_VECTOR);
703 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
704 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
705 setTargetDAGCombine(ISD::STORE);
706 setTargetDAGCombine(ISD::FP_TO_SINT);
707 setTargetDAGCombine(ISD::FP_TO_UINT);
708 setTargetDAGCombine(ISD::FDIV);
709 setTargetDAGCombine(ISD::LOAD);
710
711 // It is legal to extload from v4i8 to v4i16 or v4i32.
712 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
713 MVT::v2i32}) {
714 for (MVT VT : MVT::integer_vector_valuetypes()) {
715 setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
716 setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
717 setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
718 }
719 }
720 }
721
722 if (Subtarget->isFPOnlySP()) {
723 // When targeting a floating-point unit with only single-precision
724 // operations, f64 is legal for the few double-precision instructions which
725 // are present However, no double-precision operations other than moves,
726 // loads and stores are provided by the hardware.
727 setOperationAction(ISD::FADD, MVT::f64, Expand);
728 setOperationAction(ISD::FSUB, MVT::f64, Expand);
729 setOperationAction(ISD::FMUL, MVT::f64, Expand);
730 setOperationAction(ISD::FMA, MVT::f64, Expand);
731 setOperationAction(ISD::FDIV, MVT::f64, Expand);
732 setOperationAction(ISD::FREM, MVT::f64, Expand);
733 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
734 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
735 setOperationAction(ISD::FNEG, MVT::f64, Expand);
736 setOperationAction(ISD::FABS, MVT::f64, Expand);
737 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
738 setOperationAction(ISD::FSIN, MVT::f64, Expand);
739 setOperationAction(ISD::FCOS, MVT::f64, Expand);
740 setOperationAction(ISD::FPOW, MVT::f64, Expand);
741 setOperationAction(ISD::FLOG, MVT::f64, Expand);
742 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
743 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
744 setOperationAction(ISD::FEXP, MVT::f64, Expand);
745 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
746 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
747 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
748 setOperationAction(ISD::FRINT, MVT::f64, Expand);
749 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
750 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
751 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
752 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
753 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
754 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
755 setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
756 setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
757 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
758 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
759 }
760
761 computeRegisterProperties(Subtarget->getRegisterInfo());
762
763 // ARM does not have floating-point extending loads.
764 for (MVT VT : MVT::fp_valuetypes()) {
765 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
766 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
767 }
768
769 // ... or truncating stores
770 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
771 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
772 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
773
774 // ARM does not have i1 sign extending load.
775 for (MVT VT : MVT::integer_valuetypes())
776 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
777
778 // ARM supports all 4 flavors of integer indexed load / store.
779 if (!Subtarget->isThumb1Only()) {
780 for (unsigned im = (unsigned)ISD::PRE_INC;
781 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
782 setIndexedLoadAction(im, MVT::i1, Legal);
783 setIndexedLoadAction(im, MVT::i8, Legal);
784 setIndexedLoadAction(im, MVT::i16, Legal);
785 setIndexedLoadAction(im, MVT::i32, Legal);
786 setIndexedStoreAction(im, MVT::i1, Legal);
787 setIndexedStoreAction(im, MVT::i8, Legal);
788 setIndexedStoreAction(im, MVT::i16, Legal);
789 setIndexedStoreAction(im, MVT::i32, Legal);
790 }
791 } else {
792 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
793 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
794 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
795 }
796
797 setOperationAction(ISD::SADDO, MVT::i32, Custom);
798 setOperationAction(ISD::UADDO, MVT::i32, Custom);
799 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
800 setOperationAction(ISD::USUBO, MVT::i32, Custom);
801
802 setOperationAction(ISD::ADDCARRY, MVT::i32, Custom);
803 setOperationAction(ISD::SUBCARRY, MVT::i32, Custom);
804
805 // i64 operation support.
806 setOperationAction(ISD::MUL, MVT::i64, Expand);
807 setOperationAction(ISD::MULHU, MVT::i32, Expand);
808 if (Subtarget->isThumb1Only()) {
809 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
810 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
811 }
812 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
813 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
814 setOperationAction(ISD::MULHS, MVT::i32, Expand);
815
816 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
817 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
818 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
819 setOperationAction(ISD::SRL, MVT::i64, Custom);
820 setOperationAction(ISD::SRA, MVT::i64, Custom);
821 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
822
823 setOperationAction(ISD::ADDC, MVT::i32, Custom);
824 setOperationAction(ISD::ADDE, MVT::i32, Custom);
825 setOperationAction(ISD::SUBC, MVT::i32, Custom);
826 setOperationAction(ISD::SUBE, MVT::i32, Custom);
827
828 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
829 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
830
831 // ARM does not have ROTL.
832 setOperationAction(ISD::ROTL, MVT::i32, Expand);
833 for (MVT VT : MVT::vector_valuetypes()) {
834 setOperationAction(ISD::ROTL, VT, Expand);
835 setOperationAction(ISD::ROTR, VT, Expand);
836 }
837 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
838 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
839 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
840 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
841
842 // @llvm.readcyclecounter requires the Performance Monitors extension.
843 // Default to the 0 expansion on unsupported platforms.
844 // FIXME: Technically there are older ARM CPUs that have
845 // implementation-specific ways of obtaining this information.
846 if (Subtarget->hasPerfMon())
847 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
848
849 // Only ARMv6 has BSWAP.
850 if (!Subtarget->hasV6Ops())
851 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
852
853 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
854 : Subtarget->hasDivideInARMMode();
855 if (!hasDivide) {
856 // These are expanded into libcalls if the cpu doesn't have HW divider.
857 setOperationAction(ISD::SDIV, MVT::i32, LibCall);
858 setOperationAction(ISD::UDIV, MVT::i32, LibCall);
859 }
860
861 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
862 setOperationAction(ISD::SDIV, MVT::i32, Custom);
863 setOperationAction(ISD::UDIV, MVT::i32, Custom);
864
865 setOperationAction(ISD::SDIV, MVT::i64, Custom);
866 setOperationAction(ISD::UDIV, MVT::i64, Custom);
867 }
868
869 setOperationAction(ISD::SREM, MVT::i32, Expand);
870 setOperationAction(ISD::UREM, MVT::i32, Expand);
871
872 // Register based DivRem for AEABI (RTABI 4.2)
873 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
874 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
875 Subtarget->isTargetWindows()) {
876 setOperationAction(ISD::SREM, MVT::i64, Custom);
877 setOperationAction(ISD::UREM, MVT::i64, Custom);
878 HasStandaloneRem = false;
879
880 if (Subtarget->isTargetWindows()) {
881 const struct {
882 const RTLIB::Libcall Op;
883 const char * const Name;
884 const CallingConv::ID CC;
885 } LibraryCalls[] = {
886 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
887 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
888 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
889 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
890
891 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
892 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
893 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
894 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
895 };
896
897 for (const auto &LC : LibraryCalls) {
898 setLibcallName(LC.Op, LC.Name);
899 setLibcallCallingConv(LC.Op, LC.CC);
900 }
901 } else {
902 const struct {
903 const RTLIB::Libcall Op;
904 const char * const Name;
905 const CallingConv::ID CC;
906 } LibraryCalls[] = {
907 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
908 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
909 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
910 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
911
912 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
913 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
914 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
915 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
916 };
917
918 for (const auto &LC : LibraryCalls) {
919 setLibcallName(LC.Op, LC.Name);
920 setLibcallCallingConv(LC.Op, LC.CC);
921 }
922 }
923
924 setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
925 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
926 setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
927 setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
928 } else {
929 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
930 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
931 }
932
933 if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
934 for (auto &VT : {MVT::f32, MVT::f64})
935 setOperationAction(ISD::FPOWI, VT, Custom);
936
937 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
938 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
939 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
940 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
941
942 setOperationAction(ISD::TRAP, MVT::Other, Legal);
943
944 // Use the default implementation.
945 setOperationAction(ISD::VASTART, MVT::Other, Custom);
946 setOperationAction(ISD::VAARG, MVT::Other, Expand);
947 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
948 setOperationAction(ISD::VAEND, MVT::Other, Expand);
949 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
950 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
951
952 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
953 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
954 else
955 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
956
957 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
958 // the default expansion.
959 InsertFencesForAtomic = false;
960 if (Subtarget->hasAnyDataBarrier() &&
961 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
962 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
963 // to ldrex/strex loops already.
964 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
965 if (!Subtarget->isThumb() || !Subtarget->isMClass())
966 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
967
968 // On v8, we have particularly efficient implementations of atomic fences
969 // if they can be combined with nearby atomic loads and stores.
970 if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
971 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
972 InsertFencesForAtomic = true;
973 }
974 } else {
975 // If there's anything we can use as a barrier, go through custom lowering
976 // for ATOMIC_FENCE.
977 // If target has DMB in thumb, Fences can be inserted.
978 if (Subtarget->hasDataBarrier())
979 InsertFencesForAtomic = true;
980
981 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
982 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
983
984 // Set them all for expansion, which will force libcalls.
985 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
986 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
987 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
988 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
989 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
990 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
991 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
992 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
993 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
994 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
995 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
996 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
997 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
998 // Unordered/Monotonic case.
999 if (!InsertFencesForAtomic) {
1000 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1001 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1002 }
1003 }
1004
1005 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1006
1007 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1008 if (!Subtarget->hasV6Ops()) {
1009 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
1010 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
1011 }
1012 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1013
1014 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1015 !Subtarget->isThumb1Only()) {
1016 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1017 // iff target supports vfp2.
1018 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1019 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
1020 }
1021
1022 // We want to custom lower some of our intrinsics.
1023 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1024 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
1025 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
1026 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
1027 if (Subtarget->useSjLjEH())
1028 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1029
1030 setOperationAction(ISD::SETCC, MVT::i32, Expand);
1031 setOperationAction(ISD::SETCC, MVT::f32, Expand);
1032 setOperationAction(ISD::SETCC, MVT::f64, Expand);
1033 setOperationAction(ISD::SELECT, MVT::i32, Custom);
1034 setOperationAction(ISD::SELECT, MVT::f32, Custom);
1035 setOperationAction(ISD::SELECT, MVT::f64, Custom);
1036 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1037 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
1038 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
1039
1040 // Thumb-1 cannot currently select ARMISD::SUBE.
1041 if (!Subtarget->isThumb1Only())
1042 setOperationAction(ISD::SETCCE, MVT::i32, Custom);
1043
1044 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
1045 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1046 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1047 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1048 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1049
1050 // We don't support sin/cos/fmod/copysign/pow
1051 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1052 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1053 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1054 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1055 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1056 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1057 setOperationAction(ISD::FREM, MVT::f64, Expand);
1058 setOperationAction(ISD::FREM, MVT::f32, Expand);
1059 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1060 !Subtarget->isThumb1Only()) {
1061 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
1062 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
1063 }
1064 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1065 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1066
1067 if (!Subtarget->hasVFP4()) {
1068 setOperationAction(ISD::FMA, MVT::f64, Expand);
1069 setOperationAction(ISD::FMA, MVT::f32, Expand);
1070 }
1071
1072 // Various VFP goodness
1073 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1074 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1075 if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1076 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1077 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1078 }
1079
1080 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1081 if (!Subtarget->hasFP16()) {
1082 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1083 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1084 }
1085 }
1086
1087 // Combine sin / cos into one node or libcall if possible.
1088 if (Subtarget->hasSinCos()) {
1089 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1090 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1091 if (Subtarget->isTargetWatchABI()) {
1092 setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
1093 setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
1094 }
1095 if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
1096 // For iOS, we don't want to the normal expansion of a libcall to
1097 // sincos. We want to issue a libcall to __sincos_stret.
1098 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1099 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1100 }
1101 }
1102
1103 // FP-ARMv8 implements a lot of rounding-like FP operations.
1104 if (Subtarget->hasFPARMv8()) {
1105 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1106 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1107 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1108 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1109 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1110 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1111 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1112 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1113 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1114 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1115 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1116 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1117
1118 if (!Subtarget->isFPOnlySP()) {
1119 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1120 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1121 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1122 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1123 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1124 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1125 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1126 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1127 }
1128 }
1129
1130 if (Subtarget->hasNEON()) {
1131 // vmin and vmax aren't available in a scalar form, so we use
1132 // a NEON instruction with an undef lane instead.
1133 setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
1134 setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
1135 setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
1136 setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
1137 setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
1138 setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
1139 }
1140
1141 // We have target-specific dag combine patterns for the following nodes:
1142 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1143 setTargetDAGCombine(ISD::ADD);
1144 setTargetDAGCombine(ISD::SUB);
1145 setTargetDAGCombine(ISD::MUL);
1146 setTargetDAGCombine(ISD::AND);
1147 setTargetDAGCombine(ISD::OR);
1148 setTargetDAGCombine(ISD::XOR);
1149
1150 if (Subtarget->hasV6Ops())
1151 setTargetDAGCombine(ISD::SRL);
1152
1153 setStackPointerRegisterToSaveRestore(ARM::SP);
1154
1155 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1156 !Subtarget->hasVFP2())
1157 setSchedulingPreference(Sched::RegPressure);
1158 else
1159 setSchedulingPreference(Sched::Hybrid);
1160
1161 //// temporary - rewrite interface to use type
1162 MaxStoresPerMemset = 8;
1163 MaxStoresPerMemsetOptSize = 4;
1164 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1165 MaxStoresPerMemcpyOptSize = 2;
1166 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1167 MaxStoresPerMemmoveOptSize = 2;
1168
1169 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1170 // are at least 4 bytes aligned.
1171 setMinStackArgumentAlignment(4);
1172
1173 // Prefer likely predicted branches to selects on out-of-order cores.
1174 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1175
1176 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1177}
1178
1179bool ARMTargetLowering::useSoftFloat() const {
1180 return Subtarget->useSoftFloat();
1181}
1182
1183// FIXME: It might make sense to define the representative register class as the
1184// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1185// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1186// SPR's representative would be DPR_VFP2. This should work well if register
1187// pressure tracking were modified such that a register use would increment the
1188// pressure of the register class's representative and all of it's super
1189// classes' representatives transitively. We have not implemented this because
1190// of the difficulty prior to coalescing of modeling operand register classes
1191// due to the common occurrence of cross class copies and subregister insertions
1192// and extractions.
1193std::pair<const TargetRegisterClass *, uint8_t>
1194ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1195 MVT VT) const {
1196 const TargetRegisterClass *RRC = nullptr;
1197 uint8_t Cost = 1;
1198 switch (VT.SimpleTy) {
1199 default:
1200 return TargetLowering::findRepresentativeClass(TRI, VT);
1201 // Use DPR as representative register class for all floating point
1202 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1203 // the cost is 1 for both f32 and f64.
1204 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1205 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1206 RRC = &ARM::DPRRegClass;
1207 // When NEON is used for SP, only half of the register file is available
1208 // because operations that define both SP and DP results will be constrained
1209 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1210 // coalescing by double-counting the SP regs. See the FIXME above.
1211 if (Subtarget->useNEONForSinglePrecisionFP())
1212 Cost = 2;
1213 break;
1214 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1215 case MVT::v4f32: case MVT::v2f64:
1216 RRC = &ARM::DPRRegClass;
1217 Cost = 2;
1218 break;
1219 case MVT::v4i64:
1220 RRC = &ARM::DPRRegClass;
1221 Cost = 4;
1222 break;
1223 case MVT::v8i64:
1224 RRC = &ARM::DPRRegClass;
1225 Cost = 8;
1226 break;
1227 }
1228 return std::make_pair(RRC, Cost);
1229}
1230
1231const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1232 switch ((ARMISD::NodeType)Opcode) {
1233 case ARMISD::FIRST_NUMBER: break;
1234 case ARMISD::Wrapper: return "ARMISD::Wrapper";
1235 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1236 case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1237 case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1238 case ARMISD::CALL: return "ARMISD::CALL";
1239 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1240 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1241 case ARMISD::BRCOND: return "ARMISD::BRCOND";
1242 case ARMISD::BR_JT: return "ARMISD::BR_JT";
1243 case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1244 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1245 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1246 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1247 case ARMISD::CMP: return "ARMISD::CMP";
1248 case ARMISD::CMN: return "ARMISD::CMN";
1249 case ARMISD::CMPZ: return "ARMISD::CMPZ";
1250 case ARMISD::CMPFP: return "ARMISD::CMPFP";
1251 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1252 case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1253 case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1254
1255 case ARMISD::CMOV: return "ARMISD::CMOV";
1256
1257 case ARMISD::SSAT: return "ARMISD::SSAT";
1258
1259 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1260 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1261 case ARMISD::RRX: return "ARMISD::RRX";
1262
1263 case ARMISD::ADDC: return "ARMISD::ADDC";
1264 case ARMISD::ADDE: return "ARMISD::ADDE";
1265 case ARMISD::SUBC: return "ARMISD::SUBC";
1266 case ARMISD::SUBE: return "ARMISD::SUBE";
1267
1268 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1269 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1270
1271 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1272 case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1273 case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1274
1275 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1276
1277 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1278
1279 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1280
1281 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1282
1283 case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1284
1285 case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1286 case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1287
1288 case ARMISD::VCEQ: return "ARMISD::VCEQ";
1289 case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1290 case ARMISD::VCGE: return "ARMISD::VCGE";
1291 case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1292 case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1293 case ARMISD::VCGEU: return "ARMISD::VCGEU";
1294 case ARMISD::VCGT: return "ARMISD::VCGT";
1295 case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1296 case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1297 case ARMISD::VCGTU: return "ARMISD::VCGTU";
1298 case ARMISD::VTST: return "ARMISD::VTST";
1299
1300 case ARMISD::VSHL: return "ARMISD::VSHL";
1301 case ARMISD::VSHRs: return "ARMISD::VSHRs";
1302 case ARMISD::VSHRu: return "ARMISD::VSHRu";
1303 case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1304 case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1305 case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1306 case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1307 case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1308 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1309 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1310 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1311 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1312 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1313 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1314 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1315 case ARMISD::VSLI: return "ARMISD::VSLI";
1316 case ARMISD::VSRI: return "ARMISD::VSRI";
1317 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1318 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1319 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1320 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1321 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1322 case ARMISD::VDUP: return "ARMISD::VDUP";
1323 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1324 case ARMISD::VEXT: return "ARMISD::VEXT";
1325 case ARMISD::VREV64: return "ARMISD::VREV64";
1326 case ARMISD::VREV32: return "ARMISD::VREV32";
1327 case ARMISD::VREV16: return "ARMISD::VREV16";
1328 case ARMISD::VZIP: return "ARMISD::VZIP";
1329 case ARMISD::VUZP: return "ARMISD::VUZP";
1330 case ARMISD::VTRN: return "ARMISD::VTRN";
1331 case ARMISD::VTBL1: return "ARMISD::VTBL1";
1332 case ARMISD::VTBL2: return "ARMISD::VTBL2";
1333 case ARMISD::VMULLs: return "ARMISD::VMULLs";
1334 case ARMISD::VMULLu: return "ARMISD::VMULLu";
1335 case ARMISD::UMAAL: return "ARMISD::UMAAL";
1336 case ARMISD::UMLAL: return "ARMISD::UMLAL";
1337 case ARMISD::SMLAL: return "ARMISD::SMLAL";
1338 case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1339 case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1340 case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1341 case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1342 case ARMISD::SMULWB: return "ARMISD::SMULWB";
1343 case ARMISD::SMULWT: return "ARMISD::SMULWT";
1344 case ARMISD::SMLALD: return "ARMISD::SMLALD";
1345 case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1346 case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1347 case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1348 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1349 case ARMISD::BFI: return "ARMISD::BFI";
1350 case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1351 case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1352 case ARMISD::VBSL: return "ARMISD::VBSL";
1353 case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1354 case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1355 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1356 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1357 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1358 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1359 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1360 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1361 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1362 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1363 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1364 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1365 case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1366 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1367 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1368 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1369 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1370 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1371 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1372 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1373 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1374 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1375 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1376 }
1377 return nullptr;
1378}
1379
1380EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1381 EVT VT) const {
1382 if (!VT.isVector())
1383 return getPointerTy(DL);
1384 return VT.changeVectorElementTypeToInteger();
1385}
1386
1387/// getRegClassFor - Return the register class that should be used for the
1388/// specified value type.
1389const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
1390 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1391 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1392 // load / store 4 to 8 consecutive D registers.
1393 if (Subtarget->hasNEON()) {
1394 if (VT == MVT::v4i64)
1395 return &ARM::QQPRRegClass;
1396 if (VT == MVT::v8i64)
1397 return &ARM::QQQQPRRegClass;
1398 }
1399 return TargetLowering::getRegClassFor(VT);
1400}
1401
1402// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1403// source/dest is aligned and the copy size is large enough. We therefore want
1404// to align such objects passed to memory intrinsics.
1405bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
1406 unsigned &PrefAlign) const {
1407 if (!isa<MemIntrinsic>(CI))
1408 return false;
1409 MinSize = 8;
1410 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1411 // cycle faster than 4-byte aligned LDM.
1412 PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1413 return true;
1414}
1415
1416// Create a fast isel object.
1417FastISel *
1418ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1419 const TargetLibraryInfo *libInfo) const {
1420 return ARM::createFastISel(funcInfo, libInfo);
1421}
1422
1423Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1424 unsigned NumVals = N->getNumValues();
1425 if (!NumVals)
1426 return Sched::RegPressure;
1427
1428 for (unsigned i = 0; i != NumVals; ++i) {
1429 EVT VT = N->getValueType(i);
1430 if (VT == MVT::Glue || VT == MVT::Other)
1431 continue;
1432 if (VT.isFloatingPoint() || VT.isVector())
1433 return Sched::ILP;
1434 }
1435
1436 if (!N->isMachineOpcode())
1437 return Sched::RegPressure;
1438
1439 // Load are scheduled for latency even if there instruction itinerary
1440 // is not available.
1441 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1442 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1443
1444 if (MCID.getNumDefs() == 0)
1445 return Sched::RegPressure;
1446 if (!Itins->isEmpty() &&
1447 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1448 return Sched::ILP;
1449
1450 return Sched::RegPressure;
1451}
1452
1453//===----------------------------------------------------------------------===//
1454// Lowering Code
1455//===----------------------------------------------------------------------===//
1456
1457static bool isSRL16(const SDValue &Op) {
1458 if (Op.getOpcode() != ISD::SRL)
1459 return false;
1460 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1461 return Const->getZExtValue() == 16;
1462 return false;
1463}
1464
1465static bool isSRA16(const SDValue &Op) {
1466 if (Op.getOpcode() != ISD::SRA)
1467 return false;
1468 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1469 return Const->getZExtValue() == 16;
1470 return false;
1471}
1472
1473static bool isSHL16(const SDValue &Op) {
1474 if (Op.getOpcode() != ISD::SHL)
1475 return false;
1476 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1477 return Const->getZExtValue() == 16;
1478 return false;
1479}
1480
1481// Check for a signed 16-bit value. We special case SRA because it makes it
1482// more simple when also looking for SRAs that aren't sign extending a
1483// smaller value. Without the check, we'd need to take extra care with
1484// checking order for some operations.
1485static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1486 if (isSRA16(Op))
1487 return isSHL16(Op.getOperand(0));
1488 return DAG.ComputeNumSignBits(Op) == 17;
1489}
1490
1491/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1492static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1493 switch (CC) {
1494 default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1494)
;
1495 case ISD::SETNE: return ARMCC::NE;
1496 case ISD::SETEQ: return ARMCC::EQ;
1497 case ISD::SETGT: return ARMCC::GT;
1498 case ISD::SETGE: return ARMCC::GE;
1499 case ISD::SETLT: return ARMCC::LT;
1500 case ISD::SETLE: return ARMCC::LE;
1501 case ISD::SETUGT: return ARMCC::HI;
1502 case ISD::SETUGE: return ARMCC::HS;
1503 case ISD::SETULT: return ARMCC::LO;
1504 case ISD::SETULE: return ARMCC::LS;
1505 }
1506}
1507
1508/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1509static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1510 ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1511 CondCode2 = ARMCC::AL;
1512 InvalidOnQNaN = true;
1513 switch (CC) {
1514 default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1514)
;
1515 case ISD::SETEQ:
1516 case ISD::SETOEQ:
1517 CondCode = ARMCC::EQ;
1518 InvalidOnQNaN = false;
1519 break;
1520 case ISD::SETGT:
1521 case ISD::SETOGT: CondCode = ARMCC::GT; break;
1522 case ISD::SETGE:
1523 case ISD::SETOGE: CondCode = ARMCC::GE; break;
1524 case ISD::SETOLT: CondCode = ARMCC::MI; break;
1525 case ISD::SETOLE: CondCode = ARMCC::LS; break;
1526 case ISD::SETONE:
1527 CondCode = ARMCC::MI;
1528 CondCode2 = ARMCC::GT;
1529 InvalidOnQNaN = false;
1530 break;
1531 case ISD::SETO: CondCode = ARMCC::VC; break;
1532 case ISD::SETUO: CondCode = ARMCC::VS; break;
1533 case ISD::SETUEQ:
1534 CondCode = ARMCC::EQ;
1535 CondCode2 = ARMCC::VS;
1536 InvalidOnQNaN = false;
1537 break;
1538 case ISD::SETUGT: CondCode = ARMCC::HI; break;
1539 case ISD::SETUGE: CondCode = ARMCC::PL; break;
1540 case ISD::SETLT:
1541 case ISD::SETULT: CondCode = ARMCC::LT; break;
1542 case ISD::SETLE:
1543 case ISD::SETULE: CondCode = ARMCC::LE; break;
1544 case ISD::SETNE:
1545 case ISD::SETUNE:
1546 CondCode = ARMCC::NE;
1547 InvalidOnQNaN = false;
1548 break;
1549 }
1550}
1551
1552//===----------------------------------------------------------------------===//
1553// Calling Convention Implementation
1554//===----------------------------------------------------------------------===//
1555
1556#include "ARMGenCallingConv.inc"
1557
1558/// getEffectiveCallingConv - Get the effective calling convention, taking into
1559/// account presence of floating point hardware and calling convention
1560/// limitations, such as support for variadic functions.
1561CallingConv::ID
1562ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1563 bool isVarArg) const {
1564 switch (CC) {
1565 default:
1566 report_fatal_error("Unsupported calling convention");
1567 case CallingConv::ARM_AAPCS:
1568 case CallingConv::ARM_APCS:
1569 case CallingConv::GHC:
1570 return CC;
1571 case CallingConv::PreserveMost:
1572 return CallingConv::PreserveMost;
1573 case CallingConv::ARM_AAPCS_VFP:
1574 case CallingConv::Swift:
1575 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
1576 case CallingConv::C:
1577 if (!Subtarget->isAAPCS_ABI())
1578 return CallingConv::ARM_APCS;
1579 else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1580 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1581 !isVarArg)
1582 return CallingConv::ARM_AAPCS_VFP;
1583 else
1584 return CallingConv::ARM_AAPCS;
1585 case CallingConv::Fast:
1586 case CallingConv::CXX_FAST_TLS:
1587 if (!Subtarget->isAAPCS_ABI()) {
1588 if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1589 return CallingConv::Fast;
1590 return CallingConv::ARM_APCS;
1591 } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1592 return CallingConv::ARM_AAPCS_VFP;
1593 else
1594 return CallingConv::ARM_AAPCS;
1595 }
1596}
1597
1598CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
1599 bool isVarArg) const {
1600 return CCAssignFnForNode(CC, false, isVarArg);
1601}
1602
1603CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
1604 bool isVarArg) const {
1605 return CCAssignFnForNode(CC, true, isVarArg);
1606}
1607
1608/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1609/// CallingConvention.
1610CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1611 bool Return,
1612 bool isVarArg) const {
1613 switch (getEffectiveCallingConv(CC, isVarArg)) {
1614 default:
1615 report_fatal_error("Unsupported calling convention");
1616 case CallingConv::ARM_APCS:
1617 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1618 case CallingConv::ARM_AAPCS:
1619 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1620 case CallingConv::ARM_AAPCS_VFP:
1621 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1622 case CallingConv::Fast:
1623 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1624 case CallingConv::GHC:
1625 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1626 case CallingConv::PreserveMost:
1627 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1628 }
1629}
1630
1631/// LowerCallResult - Lower the result values of a call into the
1632/// appropriate copies out of appropriate physical registers.
1633SDValue ARMTargetLowering::LowerCallResult(
1634 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1635 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1636 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1637 SDValue ThisVal) const {
1638 // Assign locations to each value returned by this call.
1639 SmallVector<CCValAssign, 16> RVLocs;
1640 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1641 *DAG.getContext());
1642 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1643
1644 // Copy all of the result registers out of their specified physreg.
1645 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1646 CCValAssign VA = RVLocs[i];
1647
1648 // Pass 'this' value directly from the argument to return value, to avoid
1649 // reg unit interference
1650 if (i == 0 && isThisReturn) {
1651 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i32 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1652, __extension__ __PRETTY_FUNCTION__))
1652 "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i32 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1652, __extension__ __PRETTY_FUNCTION__))
;
1653 InVals.push_back(ThisVal);
1654 continue;
1655 }
1656
1657 SDValue Val;
1658 if (VA.needsCustom()) {
1659 // Handle f64 or half of a v2f64.
1660 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1661 InFlag);
1662 Chain = Lo.getValue(1);
1663 InFlag = Lo.getValue(2);
1664 VA = RVLocs[++i]; // skip ahead to next loc
1665 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1666 InFlag);
1667 Chain = Hi.getValue(1);
1668 InFlag = Hi.getValue(2);
1669 if (!Subtarget->isLittle())
1670 std::swap (Lo, Hi);
1671 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1672
1673 if (VA.getLocVT() == MVT::v2f64) {
1674 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1675 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1676 DAG.getConstant(0, dl, MVT::i32));
1677
1678 VA = RVLocs[++i]; // skip ahead to next loc
1679 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1680 Chain = Lo.getValue(1);
1681 InFlag = Lo.getValue(2);
1682 VA = RVLocs[++i]; // skip ahead to next loc
1683 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1684 Chain = Hi.getValue(1);
1685 InFlag = Hi.getValue(2);
1686 if (!Subtarget->isLittle())
1687 std::swap (Lo, Hi);
1688 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1689 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1690 DAG.getConstant(1, dl, MVT::i32));
1691 }
1692 } else {
1693 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1694 InFlag);
1695 Chain = Val.getValue(1);
1696 InFlag = Val.getValue(2);
1697 }
1698
1699 switch (VA.getLocInfo()) {
1700 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1700)
;
1701 case CCValAssign::Full: break;
1702 case CCValAssign::BCvt:
1703 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1704 break;
1705 }
1706
1707 InVals.push_back(Val);
1708 }
1709
1710 return Chain;
1711}
1712
1713/// LowerMemOpCallTo - Store the argument to the stack.
1714SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1715 SDValue Arg, const SDLoc &dl,
1716 SelectionDAG &DAG,
1717 const CCValAssign &VA,
1718 ISD::ArgFlagsTy Flags) const {
1719 unsigned LocMemOffset = VA.getLocMemOffset();
1720 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1721 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1722 StackPtr, PtrOff);
1723 return DAG.getStore(
1724 Chain, dl, Arg, PtrOff,
1725 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1726}
1727
1728void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1729 SDValue Chain, SDValue &Arg,
1730 RegsToPassVector &RegsToPass,
1731 CCValAssign &VA, CCValAssign &NextVA,
1732 SDValue &StackPtr,
1733 SmallVectorImpl<SDValue> &MemOpChains,
1734 ISD::ArgFlagsTy Flags) const {
1735 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1736 DAG.getVTList(MVT::i32, MVT::i32), Arg);
1737 unsigned id = Subtarget->isLittle() ? 0 : 1;
1738 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1739
1740 if (NextVA.isRegLoc())
1741 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1742 else {
1743 assert(NextVA.isMemLoc())(static_cast <bool> (NextVA.isMemLoc()) ? void (0) : __assert_fail
("NextVA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1743, __extension__ __PRETTY_FUNCTION__))
;
1744 if (!StackPtr.getNode())
1745 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1746 getPointerTy(DAG.getDataLayout()));
1747
1748 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1749 dl, DAG, NextVA,
1750 Flags));
1751 }
1752}
1753
1754/// LowerCall - Lowering a call into a callseq_start <-
1755/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1756/// nodes.
1757SDValue
1758ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1759 SmallVectorImpl<SDValue> &InVals) const {
1760 SelectionDAG &DAG = CLI.DAG;
1761 SDLoc &dl = CLI.DL;
1762 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1763 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1764 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1765 SDValue Chain = CLI.Chain;
1766 SDValue Callee = CLI.Callee;
1767 bool &isTailCall = CLI.IsTailCall;
1768 CallingConv::ID CallConv = CLI.CallConv;
1769 bool doesNotRet = CLI.DoesNotReturn;
1770 bool isVarArg = CLI.IsVarArg;
1771
1772 MachineFunction &MF = DAG.getMachineFunction();
1773 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1774 bool isThisReturn = false;
1775 bool isSibCall = false;
1776 auto Attr = MF.getFunction().getFnAttribute("disable-tail-calls");
1777
1778 // Disable tail calls if they're not supported.
1779 if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1780 isTailCall = false;
1781
1782 if (isTailCall) {
1783 // Check if it's really possible to do a tail call.
1784 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1785 isVarArg, isStructRet, MF.getFunction().hasStructRetAttr(),
1786 Outs, OutVals, Ins, DAG);
1787 if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
1788 report_fatal_error("failed to perform tail call elimination on a call "
1789 "site marked musttail");
1790 // We don't support GuaranteedTailCallOpt for ARM, only automatically
1791 // detected sibcalls.
1792 if (isTailCall) {
1793 ++NumTailCalls;
1794 isSibCall = true;
1795 }
1796 }
1797
1798 // Analyze operands of the call, assigning locations to each operand.
1799 SmallVector<CCValAssign, 16> ArgLocs;
1800 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1801 *DAG.getContext());
1802 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1803
1804 // Get a count of how many bytes are to be pushed on the stack.
1805 unsigned NumBytes = CCInfo.getNextStackOffset();
1806
1807 // For tail calls, memory operands are available in our caller's stack.
1808 if (isSibCall)
1809 NumBytes = 0;
1810
1811 // Adjust the stack pointer for the new arguments...
1812 // These operations are automatically eliminated by the prolog/epilog pass
1813 if (!isSibCall)
1814 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1815
1816 SDValue StackPtr =
1817 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1818
1819 RegsToPassVector RegsToPass;
1820 SmallVector<SDValue, 8> MemOpChains;
1821
1822 // Walk the register/memloc assignments, inserting copies/loads. In the case
1823 // of tail call optimization, arguments are handled later.
1824 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1825 i != e;
1826 ++i, ++realArgIdx) {
1827 CCValAssign &VA = ArgLocs[i];
1828 SDValue Arg = OutVals[realArgIdx];
1829 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1830 bool isByVal = Flags.isByVal();
1831
1832 // Promote the value if needed.
1833 switch (VA.getLocInfo()) {
1834 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1834)
;
1835 case CCValAssign::Full: break;
1836 case CCValAssign::SExt:
1837 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1838 break;
1839 case CCValAssign::ZExt:
1840 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1841 break;
1842 case CCValAssign::AExt:
1843 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1844 break;
1845 case CCValAssign::BCvt:
1846 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1847 break;
1848 }
1849
1850 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1851 if (VA.needsCustom()) {
1852 if (VA.getLocVT() == MVT::v2f64) {
1853 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1854 DAG.getConstant(0, dl, MVT::i32));
1855 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1856 DAG.getConstant(1, dl, MVT::i32));
1857
1858 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1859 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1860
1861 VA = ArgLocs[++i]; // skip ahead to next loc
1862 if (VA.isRegLoc()) {
1863 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1864 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1865 } else {
1866 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1866, __extension__ __PRETTY_FUNCTION__))
;
1867
1868 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1869 dl, DAG, VA, Flags));
1870 }
1871 } else {
1872 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1873 StackPtr, MemOpChains, Flags);
1874 }
1875 } else if (VA.isRegLoc()) {
1876 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1877 Outs[0].VT == MVT::i32) {
1878 assert(VA.getLocVT() == MVT::i32 &&(static_cast <bool> (VA.getLocVT() == MVT::i32 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1879, __extension__ __PRETTY_FUNCTION__))
1879 "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i32 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1879, __extension__ __PRETTY_FUNCTION__))
;
1880 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i32 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1881, __extension__ __PRETTY_FUNCTION__))
1881 "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i32 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1881, __extension__ __PRETTY_FUNCTION__))
;
1882 isThisReturn = true;
1883 }
1884 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1885 } else if (isByVal) {
1886 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1886, __extension__ __PRETTY_FUNCTION__))
;
1887 unsigned offset = 0;
1888
1889 // True if this byval aggregate will be split between registers
1890 // and memory.
1891 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1892 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1893
1894 if (CurByValIdx < ByValArgsCount) {
1895
1896 unsigned RegBegin, RegEnd;
1897 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1898
1899 EVT PtrVT =
1900 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
1901 unsigned int i, j;
1902 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1903 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1904 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1905 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1906 MachinePointerInfo(),
1907 DAG.InferPtrAlignment(AddArg));
1908 MemOpChains.push_back(Load.getValue(1));
1909 RegsToPass.push_back(std::make_pair(j, Load));
1910 }
1911
1912 // If parameter size outsides register area, "offset" value
1913 // helps us to calculate stack slot for remained part properly.
1914 offset = RegEnd - RegBegin;
1915
1916 CCInfo.nextInRegsParam();
1917 }
1918
1919 if (Flags.getByValSize() > 4*offset) {
1920 auto PtrVT = getPointerTy(DAG.getDataLayout());
1921 unsigned LocMemOffset = VA.getLocMemOffset();
1922 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1923 SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1924 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1925 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1926 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1927 MVT::i32);
1928 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1929 MVT::i32);
1930
1931 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1932 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1933 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1934 Ops));
1935 }
1936 } else if (!isSibCall) {
1937 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1937, __extension__ __PRETTY_FUNCTION__))
;
1938
1939 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1940 dl, DAG, VA, Flags));
1941 }
1942 }
1943
1944 if (!MemOpChains.empty())
1945 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1946
1947 // Build a sequence of copy-to-reg nodes chained together with token chain
1948 // and flag operands which copy the outgoing args into the appropriate regs.
1949 SDValue InFlag;
1950 // Tail call byval lowering might overwrite argument registers so in case of
1951 // tail call optimization the copies to registers are lowered later.
1952 if (!isTailCall)
1953 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1954 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1955 RegsToPass[i].second, InFlag);
1956 InFlag = Chain.getValue(1);
1957 }
1958
1959 // For tail calls lower the arguments to the 'real' stack slot.
1960 if (isTailCall) {
1961 // Force all the incoming stack arguments to be loaded from the stack
1962 // before any new outgoing arguments are stored to the stack, because the
1963 // outgoing stack slots may alias the incoming argument stack slots, and
1964 // the alias isn't otherwise explicit. This is slightly more conservative
1965 // than necessary, because it means that each store effectively depends
1966 // on every argument instead of just those arguments it would clobber.
1967
1968 // Do not flag preceding copytoreg stuff together with the following stuff.
1969 InFlag = SDValue();
1970 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1971 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1972 RegsToPass[i].second, InFlag);
1973 InFlag = Chain.getValue(1);
1974 }
1975 InFlag = SDValue();
1976 }
1977
1978 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1979 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1980 // node so that legalize doesn't hack it.
1981 bool isDirect = false;
1982
1983 const TargetMachine &TM = getTargetMachine();
1984 const Module *Mod = MF.getFunction().getParent();
1985 const GlobalValue *GV = nullptr;
1986 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1987 GV = G->getGlobal();
1988 bool isStub =
1989 !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
1990
1991 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1992 bool isLocalARMFunc = false;
1993 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1994 auto PtrVt = getPointerTy(DAG.getDataLayout());
1995
1996 if (Subtarget->genLongCalls()) {
1997 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(static_cast <bool> ((!isPositionIndependent() || Subtarget
->isTargetWindows()) && "long-calls codegen is not position independent!"
) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1998, __extension__ __PRETTY_FUNCTION__))
1998 "long-calls codegen is not position independent!")(static_cast <bool> ((!isPositionIndependent() || Subtarget
->isTargetWindows()) && "long-calls codegen is not position independent!"
) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 1998, __extension__ __PRETTY_FUNCTION__))
;
1999 // Handle a global address or an external symbol. If it's not one of
2000 // those, the target's already in a register, so we don't need to do
2001 // anything extra.
2002 if (isa<GlobalAddressSDNode>(Callee)) {
2003 // Create a constant pool entry for the callee address
2004 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2005 ARMConstantPoolValue *CPV =
2006 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2007
2008 // Get the address of the callee into a register
2009 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2010 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2011 Callee = DAG.getLoad(
2012 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2013 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2014 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2015 const char *Sym = S->getSymbol();
2016
2017 // Create a constant pool entry for the callee address
2018 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2019 ARMConstantPoolValue *CPV =
2020 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2021 ARMPCLabelIndex, 0);
2022 // Get the address of the callee into a register
2023 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2024 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2025 Callee = DAG.getLoad(
2026 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2027 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2028 }
2029 } else if (isa<GlobalAddressSDNode>(Callee)) {
2030 // If we're optimizing for minimum size and the function is called three or
2031 // more times in this block, we can improve codesize by calling indirectly
2032 // as BLXr has a 16-bit encoding.
2033 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2034 auto *BB = CLI.CS.getParent();
2035 bool PreferIndirect =
2036 Subtarget->isThumb() && MF.getFunction().optForMinSize() &&
2037 count_if(GV->users(), [&BB](const User *U) {
2038 return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2039 }) > 2;
2040
2041 if (!PreferIndirect) {
2042 isDirect = true;
2043 bool isDef = GV->isStrongDefinitionForLinker();
2044
2045 // ARM call to a local ARM function is predicable.
2046 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2047 // tBX takes a register source operand.
2048 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2049 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")(static_cast <bool> (Subtarget->isTargetMachO() &&
"WrapperPIC use on non-MachO?") ? void (0) : __assert_fail (
"Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2049, __extension__ __PRETTY_FUNCTION__))
;
2050 Callee = DAG.getNode(
2051 ARMISD::WrapperPIC, dl, PtrVt,
2052 DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2053 Callee = DAG.getLoad(
2054 PtrVt, dl, DAG.getEntryNode(), Callee,
2055 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2056 /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2057 MachineMemOperand::MOInvariant);
2058 } else if (Subtarget->isTargetCOFF()) {
2059 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2060, __extension__ __PRETTY_FUNCTION__))
2060 "Windows is the only supported COFF target")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2060, __extension__ __PRETTY_FUNCTION__))
;
2061 unsigned TargetFlags = GV->hasDLLImportStorageClass()
2062 ? ARMII::MO_DLLIMPORT
2063 : ARMII::MO_NO_FLAG;
2064 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2065 TargetFlags);
2066 if (GV->hasDLLImportStorageClass())
2067 Callee =
2068 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2069 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2070 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2071 } else {
2072 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2073 }
2074 }
2075 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2076 isDirect = true;
2077 // tBX takes a register source operand.
2078 const char *Sym = S->getSymbol();
2079 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2080 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2081 ARMConstantPoolValue *CPV =
2082 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2083 ARMPCLabelIndex, 4);
2084 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2085 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2086 Callee = DAG.getLoad(
2087 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2088 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2089 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2090 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2091 } else {
2092 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2093 }
2094 }
2095
2096 // FIXME: handle tail calls differently.
2097 unsigned CallOpc;
2098 if (Subtarget->isThumb()) {
2099 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2100 CallOpc = ARMISD::CALL_NOLINK;
2101 else
2102 CallOpc = ARMISD::CALL;
2103 } else {
2104 if (!isDirect && !Subtarget->hasV5TOps())
2105 CallOpc = ARMISD::CALL_NOLINK;
2106 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2107 // Emit regular call when code size is the priority
2108 !MF.getFunction().optForMinSize())
2109 // "mov lr, pc; b _foo" to avoid confusing the RSP
2110 CallOpc = ARMISD::CALL_NOLINK;
2111 else
2112 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2113 }
2114
2115 std::vector<SDValue> Ops;
2116 Ops.push_back(Chain);
2117 Ops.push_back(Callee);
2118
2119 // Add argument registers to the end of the list so that they are known live
2120 // into the call.
2121 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2122 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2123 RegsToPass[i].second.getValueType()));
2124
2125 // Add a register mask operand representing the call-preserved registers.
2126 if (!isTailCall) {
2127 const uint32_t *Mask;
2128 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2129 if (isThisReturn) {
2130 // For 'this' returns, use the R0-preserving mask if applicable
2131 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2132 if (!Mask) {
2133 // Set isThisReturn to false if the calling convention is not one that
2134 // allows 'returned' to be modeled in this way, so LowerCallResult does
2135 // not try to pass 'this' straight through
2136 isThisReturn = false;
2137 Mask = ARI->getCallPreservedMask(MF, CallConv);
2138 }
2139 } else
2140 Mask = ARI->getCallPreservedMask(MF, CallConv);
2141
2142 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2142, __extension__ __PRETTY_FUNCTION__))
;
2143 Ops.push_back(DAG.getRegisterMask(Mask));
2144 }
2145
2146 if (InFlag.getNode())
2147 Ops.push_back(InFlag);
2148
2149 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2150 if (isTailCall) {
2151 MF.getFrameInfo().setHasTailCall();
2152 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2153 }
2154
2155 // Returns a chain and a flag for retval copy to use.
2156 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2157 InFlag = Chain.getValue(1);
2158
2159 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2160 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2161 if (!Ins.empty())
2162 InFlag = Chain.getValue(1);
2163
2164 // Handle result values, copying them out of physregs into vregs that we
2165 // return.
2166 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2167 InVals, isThisReturn,
2168 isThisReturn ? OutVals[0] : SDValue());
2169}
2170
2171/// HandleByVal - Every parameter *after* a byval parameter is passed
2172/// on the stack. Remember the next parameter register to allocate,
2173/// and then confiscate the rest of the parameter registers to insure
2174/// this.
2175void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2176 unsigned Align) const {
2177 // Byval (as with any stack) slots are always at least 4 byte aligned.
2178 Align = std::max(Align, 4U);
2179
2180 unsigned Reg = State->AllocateReg(GPRArgRegs);
2181 if (!Reg)
2182 return;
2183
2184 unsigned AlignInRegs = Align / 4;
2185 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2186 for (unsigned i = 0; i < Waste; ++i)
2187 Reg = State->AllocateReg(GPRArgRegs);
2188
2189 if (!Reg)
2190 return;
2191
2192 unsigned Excess = 4 * (ARM::R4 - Reg);
2193
2194 // Special case when NSAA != SP and parameter size greater than size of
2195 // all remained GPR regs. In that case we can't split parameter, we must
2196 // send it to stack. We also must set NCRN to R4, so waste all
2197 // remained registers.
2198 const unsigned NSAAOffset = State->getNextStackOffset();
2199 if (NSAAOffset != 0 && Size > Excess) {
2200 while (State->AllocateReg(GPRArgRegs))
2201 ;
2202 return;
2203 }
2204
2205 // First register for byval parameter is the first register that wasn't
2206 // allocated before this method call, so it would be "reg".
2207 // If parameter is small enough to be saved in range [reg, r4), then
2208 // the end (first after last) register would be reg + param-size-in-regs,
2209 // else parameter would be splitted between registers and stack,
2210 // end register would be r4 in this case.
2211 unsigned ByValRegBegin = Reg;
2212 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2213 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2214 // Note, first register is allocated in the beginning of function already,
2215 // allocate remained amount of registers we need.
2216 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2217 State->AllocateReg(GPRArgRegs);
2218 // A byval parameter that is split between registers and memory needs its
2219 // size truncated here.
2220 // In the case where the entire structure fits in registers, we set the
2221 // size in memory to zero.
2222 Size = std::max<int>(Size - Excess, 0);
2223}
2224
2225/// MatchingStackOffset - Return true if the given stack call argument is
2226/// already available in the same position (relatively) of the caller's
2227/// incoming argument stack.
2228static
2229bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2230 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2231 const TargetInstrInfo *TII) {
2232 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2233 int FI = std::numeric_limits<int>::max();
2234 if (Arg.getOpcode() == ISD::CopyFromReg) {
2235 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2236 if (!TargetRegisterInfo::isVirtualRegister(VR))
2237 return false;
2238 MachineInstr *Def = MRI->getVRegDef(VR);
2239 if (!Def)
2240 return false;
2241 if (!Flags.isByVal()) {
2242 if (!TII->isLoadFromStackSlot(*Def, FI))
2243 return false;
2244 } else {
2245 return false;
2246 }
2247 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2248 if (Flags.isByVal())
2249 // ByVal argument is passed in as a pointer but it's now being
2250 // dereferenced. e.g.
2251 // define @foo(%struct.X* %A) {
2252 // tail call @bar(%struct.X* byval %A)
2253 // }
2254 return false;
2255 SDValue Ptr = Ld->getBasePtr();
2256 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2257 if (!FINode)
2258 return false;
2259 FI = FINode->getIndex();
2260 } else
2261 return false;
2262
2263 assert(FI != std::numeric_limits<int>::max())(static_cast <bool> (FI != std::numeric_limits<int>
::max()) ? void (0) : __assert_fail ("FI != std::numeric_limits<int>::max()"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2263, __extension__ __PRETTY_FUNCTION__))
;
2264 if (!MFI.isFixedObjectIndex(FI))
2265 return false;
2266 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2267}
2268
2269/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2270/// for tail call optimization. Targets which want to do tail call
2271/// optimization should implement this function.
2272bool
2273ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2274 CallingConv::ID CalleeCC,
2275 bool isVarArg,
2276 bool isCalleeStructRet,
2277 bool isCallerStructRet,
2278 const SmallVectorImpl<ISD::OutputArg> &Outs,
2279 const SmallVectorImpl<SDValue> &OutVals,
2280 const SmallVectorImpl<ISD::InputArg> &Ins,
2281 SelectionDAG& DAG) const {
2282 MachineFunction &MF = DAG.getMachineFunction();
2283 const Function &CallerF = MF.getFunction();
2284 CallingConv::ID CallerCC = CallerF.getCallingConv();
2285
2286 assert(Subtarget->supportsTailCall())(static_cast <bool> (Subtarget->supportsTailCall()) ?
void (0) : __assert_fail ("Subtarget->supportsTailCall()"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2286, __extension__ __PRETTY_FUNCTION__))
;
2287
2288 // Tail calls to function pointers cannot be optimized for Thumb1 if the args
2289 // to the call take up r0-r3. The reason is that there are no legal registers
2290 // left to hold the pointer to the function to be called.
2291 if (Subtarget->isThumb1Only() && Outs.size() >= 4 &&
2292 !isa<GlobalAddressSDNode>(Callee.getNode()))
2293 return false;
2294
2295 // Look for obvious safe cases to perform tail call optimization that do not
2296 // require ABI changes. This is what gcc calls sibcall.
2297
2298 // Exception-handling functions need a special set of instructions to indicate
2299 // a return to the hardware. Tail-calling another function would probably
2300 // break this.
2301 if (CallerF.hasFnAttribute("interrupt"))
2302 return false;
2303
2304 // Also avoid sibcall optimization if either caller or callee uses struct
2305 // return semantics.
2306 if (isCalleeStructRet || isCallerStructRet)
2307 return false;
2308
2309 // Externally-defined functions with weak linkage should not be
2310 // tail-called on ARM when the OS does not support dynamic
2311 // pre-emption of symbols, as the AAELF spec requires normal calls
2312 // to undefined weak functions to be replaced with a NOP or jump to the
2313 // next instruction. The behaviour of branch instructions in this
2314 // situation (as used for tail calls) is implementation-defined, so we
2315 // cannot rely on the linker replacing the tail call with a return.
2316 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2317 const GlobalValue *GV = G->getGlobal();
2318 const Triple &TT = getTargetMachine().getTargetTriple();
2319 if (GV->hasExternalWeakLinkage() &&
2320 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2321 return false;
2322 }
2323
2324 // Check that the call results are passed in the same way.
2325 LLVMContext &C = *DAG.getContext();
2326 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2327 CCAssignFnForReturn(CalleeCC, isVarArg),
2328 CCAssignFnForReturn(CallerCC, isVarArg)))
2329 return false;
2330 // The callee has to preserve all registers the caller needs to preserve.
2331 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2332 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2333 if (CalleeCC != CallerCC) {
2334 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2335 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2336 return false;
2337 }
2338
2339 // If Caller's vararg or byval argument has been split between registers and
2340 // stack, do not perform tail call, since part of the argument is in caller's
2341 // local frame.
2342 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2343 if (AFI_Caller->getArgRegsSaveSize())
2344 return false;
2345
2346 // If the callee takes no arguments then go on to check the results of the
2347 // call.
2348 if (!Outs.empty()) {
2349 // Check if stack adjustment is needed. For now, do not do this if any
2350 // argument is passed on the stack.
2351 SmallVector<CCValAssign, 16> ArgLocs;
2352 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2353 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2354 if (CCInfo.getNextStackOffset()) {
2355 // Check if the arguments are already laid out in the right way as
2356 // the caller's fixed stack objects.
2357 MachineFrameInfo &MFI = MF.getFrameInfo();
2358 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2359 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2360 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2361 i != e;
2362 ++i, ++realArgIdx) {
2363 CCValAssign &VA = ArgLocs[i];
2364 EVT RegVT = VA.getLocVT();
2365 SDValue Arg = OutVals[realArgIdx];
2366 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2367 if (VA.getLocInfo() == CCValAssign::Indirect)
2368 return false;
2369 if (VA.needsCustom()) {
2370 // f64 and vector types are split into multiple registers or
2371 // register/stack-slot combinations. The types will not match
2372 // the registers; give up on memory f64 refs until we figure
2373 // out what to do about this.
2374 if (!VA.isRegLoc())
2375 return false;
2376 if (!ArgLocs[++i].isRegLoc())
2377 return false;
2378 if (RegVT == MVT::v2f64) {
2379 if (!ArgLocs[++i].isRegLoc())
2380 return false;
2381 if (!ArgLocs[++i].isRegLoc())
2382 return false;
2383 }
2384 } else if (!VA.isRegLoc()) {
2385 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2386 MFI, MRI, TII))
2387 return false;
2388 }
2389 }
2390 }
2391
2392 const MachineRegisterInfo &MRI = MF.getRegInfo();
2393 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2394 return false;
2395 }
2396
2397 return true;
2398}
2399
2400bool
2401ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2402 MachineFunction &MF, bool isVarArg,
2403 const SmallVectorImpl<ISD::OutputArg> &Outs,
2404 LLVMContext &Context) const {
2405 SmallVector<CCValAssign, 16> RVLocs;
2406 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2407 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2408}
2409
2410static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
2411 const SDLoc &DL, SelectionDAG &DAG) {
2412 const MachineFunction &MF = DAG.getMachineFunction();
2413 const Function &F = MF.getFunction();
2414
2415 StringRef IntKind = F.getFnAttribute("interrupt").getValueAsString();
2416
2417 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2418 // version of the "preferred return address". These offsets affect the return
2419 // instruction if this is a return from PL1 without hypervisor extensions.
2420 // IRQ/FIQ: +4 "subs pc, lr, #4"
2421 // SWI: 0 "subs pc, lr, #0"
2422 // ABORT: +4 "subs pc, lr, #4"
2423 // UNDEF: +4/+2 "subs pc, lr, #0"
2424 // UNDEF varies depending on where the exception came from ARM or Thumb
2425 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2426
2427 int64_t LROffset;
2428 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2429 IntKind == "ABORT")
2430 LROffset = 4;
2431 else if (IntKind == "SWI" || IntKind == "UNDEF")
2432 LROffset = 0;
2433 else
2434 report_fatal_error("Unsupported interrupt attribute. If present, value "
2435 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2436
2437 RetOps.insert(RetOps.begin() + 1,
2438 DAG.getConstant(LROffset, DL, MVT::i32, false));
2439
2440 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2441}
2442
2443SDValue
2444ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2445 bool isVarArg,
2446 const SmallVectorImpl<ISD::OutputArg> &Outs,
2447 const SmallVectorImpl<SDValue> &OutVals,
2448 const SDLoc &dl, SelectionDAG &DAG) const {
2449 // CCValAssign - represent the assignment of the return value to a location.
2450 SmallVector<CCValAssign, 16> RVLocs;
2451
2452 // CCState - Info about the registers and stack slots.
2453 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2454 *DAG.getContext());
2455
2456 // Analyze outgoing return values.
2457 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2458
2459 SDValue Flag;
2460 SmallVector<SDValue, 4> RetOps;
2461 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2462 bool isLittleEndian = Subtarget->isLittle();
2463
2464 MachineFunction &MF = DAG.getMachineFunction();
2465 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2466 AFI->setReturnRegsCount(RVLocs.size());
2467
2468 // Copy the result values into the output registers.
2469 for (unsigned i = 0, realRVLocIdx = 0;
2470 i != RVLocs.size();
2471 ++i, ++realRVLocIdx) {
2472 CCValAssign &VA = RVLocs[i];
2473 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2473, __extension__ __PRETTY_FUNCTION__))
;
2474
2475 SDValue Arg = OutVals[realRVLocIdx];
2476
2477 switch (VA.getLocInfo()) {
2478 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2478)
;
2479 case CCValAssign::Full: break;
2480 case CCValAssign::BCvt:
2481 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2482 break;
2483 }
2484
2485 if (VA.needsCustom()) {
2486 if (VA.getLocVT() == MVT::v2f64) {
2487 // Extract the first half and return it in two registers.
2488 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2489 DAG.getConstant(0, dl, MVT::i32));
2490 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2491 DAG.getVTList(MVT::i32, MVT::i32), Half);
2492
2493 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2494 HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2495 Flag);
2496 Flag = Chain.getValue(1);
2497 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2498 VA = RVLocs[++i]; // skip ahead to next loc
2499 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2500 HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2501 Flag);
2502 Flag = Chain.getValue(1);
2503 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2504 VA = RVLocs[++i]; // skip ahead to next loc
2505
2506 // Extract the 2nd half and fall through to handle it as an f64 value.
2507 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2508 DAG.getConstant(1, dl, MVT::i32));
2509 }
2510 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2511 // available.
2512 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2513 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2514 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2515 fmrrd.getValue(isLittleEndian ? 0 : 1),
2516 Flag);
2517 Flag = Chain.getValue(1);
2518 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2519 VA = RVLocs[++i]; // skip ahead to next loc
2520 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2521 fmrrd.getValue(isLittleEndian ? 1 : 0),
2522 Flag);
2523 } else
2524 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2525
2526 // Guarantee that all emitted copies are
2527 // stuck together, avoiding something bad.
2528 Flag = Chain.getValue(1);
2529 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2530 }
2531 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2532 const MCPhysReg *I =
2533 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2534 if (I) {
2535 for (; *I; ++I) {
2536 if (ARM::GPRRegClass.contains(*I))
2537 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2538 else if (ARM::DPRRegClass.contains(*I))
2539 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2540 else
2541 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2541)
;
2542 }
2543 }
2544
2545 // Update chain and glue.
2546 RetOps[0] = Chain;
2547 if (Flag.getNode())
2548 RetOps.push_back(Flag);
2549
2550 // CPUs which aren't M-class use a special sequence to return from
2551 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2552 // though we use "subs pc, lr, #N").
2553 //
2554 // M-class CPUs actually use a normal return sequence with a special
2555 // (hardware-provided) value in LR, so the normal code path works.
2556 if (DAG.getMachineFunction().getFunction().hasFnAttribute("interrupt") &&
2557 !Subtarget->isMClass()) {
2558 if (Subtarget->isThumb1Only())
2559 report_fatal_error("interrupt attribute is not supported in Thumb1");
2560 return LowerInterruptReturn(RetOps, dl, DAG);
2561 }
2562
2563 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2564}
2565
2566bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2567 if (N->getNumValues() != 1)
2568 return false;
2569 if (!N->hasNUsesOfValue(1, 0))
2570 return false;
2571
2572 SDValue TCChain = Chain;
2573 SDNode *Copy = *N->use_begin();
2574 if (Copy->getOpcode() == ISD::CopyToReg) {
2575 // If the copy has a glue operand, we conservatively assume it isn't safe to
2576 // perform a tail call.
2577 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2578 return false;
2579 TCChain = Copy->getOperand(0);
2580 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2581 SDNode *VMov = Copy;
2582 // f64 returned in a pair of GPRs.
2583 SmallPtrSet<SDNode*, 2> Copies;
2584 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2585 UI != UE; ++UI) {
2586 if (UI->getOpcode() != ISD::CopyToReg)
2587 return false;
2588 Copies.insert(*UI);
2589 }
2590 if (Copies.size() > 2)
2591 return false;
2592
2593 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2594 UI != UE; ++UI) {
2595 SDValue UseChain = UI->getOperand(0);
2596 if (Copies.count(UseChain.getNode()))
2597 // Second CopyToReg
2598 Copy = *UI;
2599 else {
2600 // We are at the top of this chain.
2601 // If the copy has a glue operand, we conservatively assume it
2602 // isn't safe to perform a tail call.
2603 if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2604 return false;
2605 // First CopyToReg
2606 TCChain = UseChain;
2607 }
2608 }
2609 } else if (Copy->getOpcode() == ISD::BITCAST) {
2610 // f32 returned in a single GPR.
2611 if (!Copy->hasOneUse())
2612 return false;
2613 Copy = *Copy->use_begin();
2614 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2615 return false;
2616 // If the copy has a glue operand, we conservatively assume it isn't safe to
2617 // perform a tail call.
2618 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2619 return false;
2620 TCChain = Copy->getOperand(0);
2621 } else {
2622 return false;
2623 }
2624
2625 bool HasRet = false;
2626 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2627 UI != UE; ++UI) {
2628 if (UI->getOpcode() != ARMISD::RET_FLAG &&
2629 UI->getOpcode() != ARMISD::INTRET_FLAG)
2630 return false;
2631 HasRet = true;
2632 }
2633
2634 if (!HasRet)
2635 return false;
2636
2637 Chain = TCChain;
2638 return true;
2639}
2640
2641bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2642 if (!Subtarget->supportsTailCall())
2643 return false;
2644
2645 auto Attr =
2646 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2647 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2648 return false;
2649
2650 return true;
2651}
2652
2653// Trying to write a 64 bit value so need to split into two 32 bit values first,
2654// and pass the lower and high parts through.
2655static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
2656 SDLoc DL(Op);
2657 SDValue WriteValue = Op->getOperand(2);
2658
2659 // This function is only supposed to be called for i64 type argument.
2660 assert(WriteValue.getValueType() == MVT::i64(static_cast <bool> (WriteValue.getValueType() == MVT::
i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2661, __extension__ __PRETTY_FUNCTION__))
2661 && "LowerWRITE_REGISTER called for non-i64 type argument.")(static_cast <bool> (WriteValue.getValueType() == MVT::
i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2661, __extension__ __PRETTY_FUNCTION__))
;
2662
2663 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2664 DAG.getConstant(0, DL, MVT::i32));
2665 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2666 DAG.getConstant(1, DL, MVT::i32));
2667 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2668 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2669}
2670
2671// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2672// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2673// one of the above mentioned nodes. It has to be wrapped because otherwise
2674// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2675// be used to form addressing mode. These wrapped nodes will be selected
2676// into MOVi.
2677SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2678 SelectionDAG &DAG) const {
2679 EVT PtrVT = Op.getValueType();
2680 // FIXME there is no actual debug info here
2681 SDLoc dl(Op);
2682 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2683 SDValue Res;
2684
2685 // When generating execute-only code Constant Pools must be promoted to the
2686 // global data section. It's a bit ugly that we can't share them across basic
2687 // blocks, but this way we guarantee that execute-only behaves correct with
2688 // position-independent addressing modes.
2689 if (Subtarget->genExecuteOnly()) {
2690 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2691 auto T = const_cast<Type*>(CP->getType());
2692 auto C = const_cast<Constant*>(CP->getConstVal());
2693 auto M = const_cast<Module*>(DAG.getMachineFunction().
2694 getFunction().getParent());
2695 auto GV = new GlobalVariable(
2696 *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
2697 Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2698 Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2699 Twine(AFI->createPICLabelUId())
2700 );
2701 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2702 dl, PtrVT);
2703 return LowerGlobalAddress(GA, DAG);
2704 }
2705
2706 if (CP->isMachineConstantPoolEntry())
2707 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2708 CP->getAlignment());
2709 else
2710 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2711 CP->getAlignment());
2712 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2713}
2714
2715unsigned ARMTargetLowering::getJumpTableEncoding() const {
2716 return MachineJumpTableInfo::EK_Inline;
2717}
2718
2719SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2720 SelectionDAG &DAG) const {
2721 MachineFunction &MF = DAG.getMachineFunction();
2722 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2723 unsigned ARMPCLabelIndex = 0;
2724 SDLoc DL(Op);
2725 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2726 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2727 SDValue CPAddr;
2728 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2729 if (!IsPositionIndependent) {
2730 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2731 } else {
2732 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2733 ARMPCLabelIndex = AFI->createPICLabelUId();
2734 ARMConstantPoolValue *CPV =
2735 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2736 ARMCP::CPBlockAddress, PCAdj);
2737 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2738 }
2739 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2740 SDValue Result = DAG.getLoad(
2741 PtrVT, DL, DAG.getEntryNode(), CPAddr,
2742 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2743 if (!IsPositionIndependent)
2744 return Result;
2745 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2746 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2747}
2748
2749/// \brief Convert a TLS address reference into the correct sequence of loads
2750/// and calls to compute the variable's address for Darwin, and return an
2751/// SDValue containing the final node.
2752
2753/// Darwin only has one TLS scheme which must be capable of dealing with the
2754/// fully general situation, in the worst case. This means:
2755/// + "extern __thread" declaration.
2756/// + Defined in a possibly unknown dynamic library.
2757///
2758/// The general system is that each __thread variable has a [3 x i32] descriptor
2759/// which contains information used by the runtime to calculate the address. The
2760/// only part of this the compiler needs to know about is the first word, which
2761/// contains a function pointer that must be called with the address of the
2762/// entire descriptor in "r0".
2763///
2764/// Since this descriptor may be in a different unit, in general access must
2765/// proceed along the usual ARM rules. A common sequence to produce is:
2766///
2767/// movw rT1, :lower16:_var$non_lazy_ptr
2768/// movt rT1, :upper16:_var$non_lazy_ptr
2769/// ldr r0, [rT1]
2770/// ldr rT2, [r0]
2771/// blx rT2
2772/// [...address now in r0...]
2773SDValue
2774ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2775 SelectionDAG &DAG) const {
2776 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2777, __extension__ __PRETTY_FUNCTION__))
2777 "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2777, __extension__ __PRETTY_FUNCTION__))
;
2778 SDLoc DL(Op);
2779
2780 // First step is to get the address of the actua global symbol. This is where
2781 // the TLS descriptor lives.
2782 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2783
2784 // The first entry in the descriptor is a function pointer that we must call
2785 // to obtain the address of the variable.
2786 SDValue Chain = DAG.getEntryNode();
2787 SDValue FuncTLVGet = DAG.getLoad(
2788 MVT::i32, DL, Chain, DescAddr,
2789 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2790 /* Alignment = */ 4,
2791 MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
2792 MachineMemOperand::MOInvariant);
2793 Chain = FuncTLVGet.getValue(1);
2794
2795 MachineFunction &F = DAG.getMachineFunction();
2796 MachineFrameInfo &MFI = F.getFrameInfo();
2797 MFI.setAdjustsStack(true);
2798
2799 // TLS calls preserve all registers except those that absolutely must be
2800 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2801 // silly).
2802 auto TRI =
2803 getTargetMachine().getSubtargetImpl(F.getFunction())->getRegisterInfo();
2804 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2805 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2806
2807 // Finally, we can make the call. This is just a degenerate version of a
2808 // normal AArch64 call node: r0 takes the address of the descriptor, and
2809 // returns the address of the variable in this thread.
2810 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2811 Chain =
2812 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
2813 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2814 DAG.getRegisterMask(Mask), Chain.getValue(1));
2815 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2816}
2817
2818SDValue
2819ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2820 SelectionDAG &DAG) const {
2821 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows specific TLS lowering") ? void (0) : __assert_fail (
"Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2821, __extension__ __PRETTY_FUNCTION__))
;
2822
2823 SDValue Chain = DAG.getEntryNode();
2824 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2825 SDLoc DL(Op);
2826
2827 // Load the current TEB (thread environment block)
2828 SDValue Ops[] = {Chain,
2829 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2830 DAG.getConstant(15, DL, MVT::i32),
2831 DAG.getConstant(0, DL, MVT::i32),
2832 DAG.getConstant(13, DL, MVT::i32),
2833 DAG.getConstant(0, DL, MVT::i32),
2834 DAG.getConstant(2, DL, MVT::i32)};
2835 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2836 DAG.getVTList(MVT::i32, MVT::Other), Ops);
2837
2838 SDValue TEB = CurrentTEB.getValue(0);
2839 Chain = CurrentTEB.getValue(1);
2840
2841 // Load the ThreadLocalStoragePointer from the TEB
2842 // A pointer to the TLS array is located at offset 0x2c from the TEB.
2843 SDValue TLSArray =
2844 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2845 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2846
2847 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2848 // offset into the TLSArray.
2849
2850 // Load the TLS index from the C runtime
2851 SDValue TLSIndex =
2852 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2853 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2854 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2855
2856 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2857 DAG.getConstant(2, DL, MVT::i32));
2858 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2859 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2860 MachinePointerInfo());
2861
2862 // Get the offset of the start of the .tls section (section base)
2863 const auto *GA = cast<GlobalAddressSDNode>(Op);
2864 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2865 SDValue Offset = DAG.getLoad(
2866 PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2867 DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2868 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2869
2870 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2871}
2872
2873// Lower ISD::GlobalTLSAddress using the "general dynamic" model
2874SDValue
2875ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2876 SelectionDAG &DAG) const {
2877 SDLoc dl(GA);
2878 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2879 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2880 MachineFunction &MF = DAG.getMachineFunction();
2881 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2882 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2883 ARMConstantPoolValue *CPV =
2884 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2885 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2886 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2887 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2888 Argument = DAG.getLoad(
2889 PtrVT, dl, DAG.getEntryNode(), Argument,
2890 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2891 SDValue Chain = Argument.getValue(1);
2892
2893 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2894 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2895
2896 // call __tls_get_addr.
2897 ArgListTy Args;
2898 ArgListEntry Entry;
2899 Entry.Node = Argument;
2900 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2901 Args.push_back(Entry);
2902
2903 // FIXME: is there useful debug info available here?
2904 TargetLowering::CallLoweringInfo CLI(DAG);
2905 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2906 CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
2907 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2908
2909 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2910 return CallResult.first;
2911}
2912
2913// Lower ISD::GlobalTLSAddress using the "initial exec" or
2914// "local exec" model.
2915SDValue
2916ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2917 SelectionDAG &DAG,
2918 TLSModel::Model model) const {
2919 const GlobalValue *GV = GA->getGlobal();
2920 SDLoc dl(GA);
2921 SDValue Offset;
2922 SDValue Chain = DAG.getEntryNode();
2923 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2924 // Get the Thread Pointer
2925 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2926
2927 if (model == TLSModel::InitialExec) {
2928 MachineFunction &MF = DAG.getMachineFunction();
2929 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2930 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2931 // Initial exec model.
2932 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2933 ARMConstantPoolValue *CPV =
2934 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2935 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
2936 true);
2937 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2938 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2939 Offset = DAG.getLoad(
2940 PtrVT, dl, Chain, Offset,
2941 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2942 Chain = Offset.getValue(1);
2943
2944 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2945 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2946
2947 Offset = DAG.getLoad(
2948 PtrVT, dl, Chain, Offset,
2949 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2950 } else {
2951 // local exec model
2952 assert(model == TLSModel::LocalExec)(static_cast <bool> (model == TLSModel::LocalExec) ? void
(0) : __assert_fail ("model == TLSModel::LocalExec", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2952, __extension__ __PRETTY_FUNCTION__))
;
2953 ARMConstantPoolValue *CPV =
2954 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
2955 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2956 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2957 Offset = DAG.getLoad(
2958 PtrVT, dl, Chain, Offset,
2959 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2960 }
2961
2962 // The address of the thread local variable is the add of the thread
2963 // pointer with the offset of the variable.
2964 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2965}
2966
2967SDValue
2968ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2969 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2970 if (DAG.getTarget().Options.EmulatedTLS)
2971 return LowerToTLSEmulatedModel(GA, DAG);
2972
2973 if (Subtarget->isTargetDarwin())
2974 return LowerGlobalTLSAddressDarwin(Op, DAG);
2975
2976 if (Subtarget->isTargetWindows())
2977 return LowerGlobalTLSAddressWindows(Op, DAG);
2978
2979 // TODO: implement the "local dynamic" model
2980 assert(Subtarget->isTargetELF() && "Only ELF implemented here")(static_cast <bool> (Subtarget->isTargetELF() &&
"Only ELF implemented here") ? void (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2980, __extension__ __PRETTY_FUNCTION__))
;
2981 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2982
2983 switch (model) {
2984 case TLSModel::GeneralDynamic:
2985 case TLSModel::LocalDynamic:
2986 return LowerToTLSGeneralDynamicModel(GA, DAG);
2987 case TLSModel::InitialExec:
2988 case TLSModel::LocalExec:
2989 return LowerToTLSExecModels(GA, DAG, model);
2990 }
2991 llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 2991)
;
2992}
2993
2994/// Return true if all users of V are within function F, looking through
2995/// ConstantExprs.
2996static bool allUsersAreInFunction(const Value *V, const Function *F) {
2997 SmallVector<const User*,4> Worklist;
2998 for (auto *U : V->users())
2999 Worklist.push_back(U);
3000 while (!Worklist.empty()) {
3001 auto *U = Worklist.pop_back_val();
3002 if (isa<ConstantExpr>(U)) {
3003 for (auto *UU : U->users())
3004 Worklist.push_back(UU);
3005 continue;
3006 }
3007
3008 auto *I = dyn_cast<Instruction>(U);
3009 if (!I || I->getParent()->getParent() != F)
3010 return false;
3011 }
3012 return true;
3013}
3014
3015/// Return true if all users of V are within some (any) function, looking through
3016/// ConstantExprs. In other words, are there any global constant users?
3017static bool allUsersAreInFunctions(const Value *V) {
3018 SmallVector<const User*,4> Worklist;
3019 for (auto *U : V->users())
3020 Worklist.push_back(U);
3021 while (!Worklist.empty()) {
3022 auto *U = Worklist.pop_back_val();
3023 if (isa<ConstantExpr>(U)) {
3024 for (auto *UU : U->users())
3025 Worklist.push_back(UU);
3026 continue;
3027 }
3028
3029 if (!isa<Instruction>(U))
3030 return false;
3031 }
3032 return true;
3033}
3034
3035// Return true if T is an integer, float or an array/vector of either.
3036static bool isSimpleType(Type *T) {
3037 if (T->isIntegerTy() || T->isFloatingPointTy())
3038 return true;
3039 Type *SubT = nullptr;
3040 if (T->isArrayTy())
3041 SubT = T->getArrayElementType();
3042 else if (T->isVectorTy())
3043 SubT = T->getVectorElementType();
3044 else
3045 return false;
3046 return SubT->isIntegerTy() || SubT->isFloatingPointTy();
3047}
3048
3049static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
3050 EVT PtrVT, const SDLoc &dl) {
3051 // If we're creating a pool entry for a constant global with unnamed address,
3052 // and the global is small enough, we can emit it inline into the constant pool
3053 // to save ourselves an indirection.
3054 //
3055 // This is a win if the constant is only used in one function (so it doesn't
3056 // need to be duplicated) or duplicating the constant wouldn't increase code
3057 // size (implying the constant is no larger than 4 bytes).
3058 const Function &F = DAG.getMachineFunction().getFunction();
3059
3060 // We rely on this decision to inline being idemopotent and unrelated to the
3061 // use-site. We know that if we inline a variable at one use site, we'll
3062 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3063 // doesn't know about this optimization, so bail out if it's enabled else
3064 // we could decide to inline here (and thus never emit the GV) but require
3065 // the GV from fast-isel generated code.
3066 if (!EnableConstpoolPromotion ||
3067 DAG.getMachineFunction().getTarget().Options.EnableFastISel)
3068 return SDValue();
3069
3070 auto *GVar = dyn_cast<GlobalVariable>(GV);
3071 if (!GVar || !GVar->hasInitializer() ||
3072 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3073 !GVar->hasLocalLinkage())
3074 return SDValue();
3075
3076 // Ensure that we don't try and inline any type that contains pointers. If
3077 // we inline a value that contains relocations, we move the relocations from
3078 // .data to .text which is not ideal.
3079 auto *Init = GVar->getInitializer();
3080 if (!isSimpleType(Init->getType()))
3081 return SDValue();
3082
3083 // The constant islands pass can only really deal with alignment requests
3084 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3085 // any type wanting greater alignment requirements than 4 bytes. We also
3086 // can only promote constants that are multiples of 4 bytes in size or
3087 // are paddable to a multiple of 4. Currently we only try and pad constants
3088 // that are strings for simplicity.
3089 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3090 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3091 unsigned Align = GVar->getAlignment();
3092 unsigned RequiredPadding = 4 - (Size % 4);
3093 bool PaddingPossible =
3094 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3095 if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3096 Size == 0)
3097 return SDValue();
3098
3099 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3100 MachineFunction &MF = DAG.getMachineFunction();
3101 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3102
3103 // We can't bloat the constant pool too much, else the ConstantIslands pass
3104 // may fail to converge. If we haven't promoted this global yet (it may have
3105 // multiple uses), and promoting it would increase the constant pool size (Sz
3106 // > 4), ensure we have space to do so up to MaxTotal.
3107 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3108 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3109 ConstpoolPromotionMaxTotal)
3110 return SDValue();
3111
3112 // This is only valid if all users are in a single function OR it has users
3113 // in multiple functions but it no larger than a pointer. We also check if
3114 // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
3115 // address taken.
3116 if (!allUsersAreInFunction(GVar, &F) &&
3117 !(Size <= 4 && allUsersAreInFunctions(GVar)))
3118 return SDValue();
3119
3120 // We're going to inline this global. Pad it out if needed.
3121 if (RequiredPadding != 4) {
3122 StringRef S = CDAInit->getAsString();
3123
3124 SmallVector<uint8_t,16> V(S.size());
3125 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3126 while (RequiredPadding--)
3127 V.push_back(0);
3128 Init = ConstantDataArray::get(*DAG.getContext(), V);
3129 }
3130
3131 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3132 SDValue CPAddr =
3133 DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3134 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3135 AFI->markGlobalAsPromotedToConstantPool(GVar);
3136 AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3137 PaddedSize - 4);
3138 }
3139 ++NumConstpoolPromoted;
3140 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3141}
3142
3143bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
3144 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3145 GV = GA->getBaseObject();
3146 return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3147 isa<Function>(GV);
3148}
3149
3150SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3151 SelectionDAG &DAG) const {
3152 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3153 default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3153)
;
3154 case Triple::COFF:
3155 return LowerGlobalAddressWindows(Op, DAG);
3156 case Triple::ELF:
3157 return LowerGlobalAddressELF(Op, DAG);
3158 case Triple::MachO:
3159 return LowerGlobalAddressDarwin(Op, DAG);
3160 }
3161}
3162
3163SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3164 SelectionDAG &DAG) const {
3165 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3166 SDLoc dl(Op);
3167 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3168 const TargetMachine &TM = getTargetMachine();
3169 bool IsRO = isReadOnly(GV);
3170
3171 // promoteToConstantPool only if not generating XO text section
3172 if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3173 if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
3174 return V;
3175
3176 if (isPositionIndependent()) {
3177 bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3178 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3179 UseGOT_PREL ? ARMII::MO_GOT : 0);
3180 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3181 if (UseGOT_PREL)
3182 Result =
3183 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3184 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3185 return Result;
3186 } else if (Subtarget->isROPI() && IsRO) {
3187 // PC-relative.
3188 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3189 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3190 return Result;
3191 } else if (Subtarget->isRWPI() && !IsRO) {
3192 // SB-relative.
3193 SDValue RelAddr;
3194 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3195 ++NumMovwMovt;
3196 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3197 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3198 } else { // use literal pool for address constant
3199 ARMConstantPoolValue *CPV =
3200 ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
3201 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3202 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3203 RelAddr = DAG.getLoad(
3204 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3205 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3206 }
3207 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3208 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3209 return Result;
3210 }
3211
3212 // If we have T2 ops, we can materialize the address directly via movt/movw
3213 // pair. This is always cheaper.
3214 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3215 ++NumMovwMovt;
3216 // FIXME: Once remat is capable of dealing with instructions with register
3217 // operands, expand this into two nodes.
3218 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3219 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3220 } else {
3221 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3222 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3223 return DAG.getLoad(
3224 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3225 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3226 }
3227}
3228
3229SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3230 SelectionDAG &DAG) const {
3231 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3232, __extension__ __PRETTY_FUNCTION__))
3232 "ROPI/RWPI not currently supported for Darwin")(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3232, __extension__ __PRETTY_FUNCTION__))
;
3233 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3234 SDLoc dl(Op);
3235 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3236
3237 if (Subtarget->useMovt(DAG.getMachineFunction()))
3238 ++NumMovwMovt;
3239
3240 // FIXME: Once remat is capable of dealing with instructions with register
3241 // operands, expand this into multiple nodes
3242 unsigned Wrapper =
3243 isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3244
3245 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3246 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3247
3248 if (Subtarget->isGVIndirectSymbol(GV))
3249 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3250 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3251 return Result;
3252}
3253
3254SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3255 SelectionDAG &DAG) const {
3256 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")(static_cast <bool> (Subtarget->isTargetWindows() &&
"non-Windows COFF is not supported") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3256, __extension__ __PRETTY_FUNCTION__))
;
3257 assert(Subtarget->useMovt(DAG.getMachineFunction()) &&(static_cast <bool> (Subtarget->useMovt(DAG.getMachineFunction
()) && "Windows on ARM expects to use movw/movt") ? void
(0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3258, __extension__ __PRETTY_FUNCTION__))
3258 "Windows on ARM expects to use movw/movt")(static_cast <bool> (Subtarget->useMovt(DAG.getMachineFunction
()) && "Windows on ARM expects to use movw/movt") ? void
(0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3258, __extension__ __PRETTY_FUNCTION__))
;
3259 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3260, __extension__ __PRETTY_FUNCTION__))
3260 "ROPI/RWPI not currently supported for Windows")(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3260, __extension__ __PRETTY_FUNCTION__))
;
3261
3262 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3263 const ARMII::TOF TargetFlags =
3264 (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
3265 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3266 SDValue Result;
3267 SDLoc DL(Op);
3268
3269 ++NumMovwMovt;
3270
3271 // FIXME: Once remat is capable of dealing with instructions with register
3272 // operands, expand this into two nodes.
3273 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3274 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3275 TargetFlags));
3276 if (GV->hasDLLImportStorageClass())
3277 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3278 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3279 return Result;
3280}
3281
3282SDValue
3283ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3284 SDLoc dl(Op);
3285 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3286 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3287 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3288 Op.getOperand(1), Val);
3289}
3290
3291SDValue
3292ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3293 SDLoc dl(Op);
3294 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3295 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3296}
3297
3298SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3299 SelectionDAG &DAG) const {
3300 SDLoc dl(Op);
3301 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3302 Op.getOperand(0));
3303}
3304
3305SDValue
3306ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3307 const ARMSubtarget *Subtarget) const {
3308 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3309 SDLoc dl(Op);
3310 switch (IntNo) {
3311 default: return SDValue(); // Don't custom lower most intrinsics.
3312 case Intrinsic::thread_pointer: {
3313 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3314 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3315 }
3316 case Intrinsic::eh_sjlj_lsda: {
3317 MachineFunction &MF = DAG.getMachineFunction();
3318 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3319 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3320 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3321 SDValue CPAddr;
3322 bool IsPositionIndependent = isPositionIndependent();
3323 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3324 ARMConstantPoolValue *CPV =
3325 ARMConstantPoolConstant::Create(&MF.getFunction(), ARMPCLabelIndex,
3326 ARMCP::CPLSDA, PCAdj);
3327 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3328 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3329 SDValue Result = DAG.getLoad(
3330 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3331 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3332
3333 if (IsPositionIndependent) {
3334 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3335 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3336 }
3337 return Result;
3338 }
3339 case Intrinsic::arm_neon_vabs:
3340 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3341 Op.getOperand(1));
3342 case Intrinsic::arm_neon_vmulls:
3343 case Intrinsic::arm_neon_vmullu: {
3344 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3345 ? ARMISD::VMULLs : ARMISD::VMULLu;
3346 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3347 Op.getOperand(1), Op.getOperand(2));
3348 }
3349 case Intrinsic::arm_neon_vminnm:
3350 case Intrinsic::arm_neon_vmaxnm: {
3351 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3352 ? ISD::FMINNUM : ISD::FMAXNUM;
3353 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3354 Op.getOperand(1), Op.getOperand(2));
3355 }
3356 case Intrinsic::arm_neon_vminu:
3357 case Intrinsic::arm_neon_vmaxu: {
3358 if (Op.getValueType().isFloatingPoint())
3359 return SDValue();
3360 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3361 ? ISD::UMIN : ISD::UMAX;
3362 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3363 Op.getOperand(1), Op.getOperand(2));
3364 }
3365 case Intrinsic::arm_neon_vmins:
3366 case Intrinsic::arm_neon_vmaxs: {
3367 // v{min,max}s is overloaded between signed integers and floats.
3368 if (!Op.getValueType().isFloatingPoint()) {
3369 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3370 ? ISD::SMIN : ISD::SMAX;
3371 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3372 Op.getOperand(1), Op.getOperand(2));
3373 }
3374 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3375 ? ISD::FMINNAN : ISD::FMAXNAN;
3376 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3377 Op.getOperand(1), Op.getOperand(2));
3378 }
3379 case Intrinsic::arm_neon_vtbl1:
3380 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3381 Op.getOperand(1), Op.getOperand(2));
3382 case Intrinsic::arm_neon_vtbl2:
3383 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3384 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3385 }
3386}
3387
3388static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
3389 const ARMSubtarget *Subtarget) {
3390 SDLoc dl(Op);
3391 ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3392 auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3393 if (SSID == SyncScope::SingleThread)
3394 return Op;
3395
3396 if (!Subtarget->hasDataBarrier()) {
3397 // Some ARMv6 cpus can support data barriers with an mcr instruction.
3398 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3399 // here.
3400 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&(static_cast <bool> (Subtarget->hasV6Ops() &&
!Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3401, __extension__ __PRETTY_FUNCTION__))
3401 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")(static_cast <bool> (Subtarget->hasV6Ops() &&
!Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3401, __extension__ __PRETTY_FUNCTION__))
;
3402 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3403 DAG.getConstant(0, dl, MVT::i32));
3404 }
3405
3406 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3407 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3408 ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3409 if (Subtarget->isMClass()) {
3410 // Only a full system barrier exists in the M-class architectures.
3411 Domain = ARM_MB::SY;
3412 } else if (Subtarget->preferISHSTBarriers() &&
3413 Ord == AtomicOrdering::Release) {
3414 // Swift happens to implement ISHST barriers in a way that's compatible with
3415 // Release semantics but weaker than ISH so we'd be fools not to use
3416 // it. Beware: other processors probably don't!
3417 Domain = ARM_MB::ISHST;
3418 }
3419
3420 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3421 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3422 DAG.getConstant(Domain, dl, MVT::i32));
3423}
3424
3425static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
3426 const ARMSubtarget *Subtarget) {
3427 // ARM pre v5TE and Thumb1 does not have preload instructions.
3428 if (!(Subtarget->isThumb2() ||
3429 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3430 // Just preserve the chain.
3431 return Op.getOperand(0);
3432
3433 SDLoc dl(Op);
3434 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3435 if (!isRead &&
3436 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3437 // ARMv7 with MP extension has PLDW.
3438 return Op.getOperand(0);
3439
3440 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3441 if (Subtarget->isThumb()) {
3442 // Invert the bits.
3443 isRead = ~isRead & 1;
3444 isData = ~isData & 1;
3445 }
3446
3447 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3448 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3449 DAG.getConstant(isData, dl, MVT::i32));
3450}
3451
3452static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
3453 MachineFunction &MF = DAG.getMachineFunction();
3454 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3455
3456 // vastart just stores the address of the VarArgsFrameIndex slot into the
3457 // memory location argument.
3458 SDLoc dl(Op);
3459 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3460 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3461 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3462 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3463 MachinePointerInfo(SV));
3464}
3465
3466SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3467 CCValAssign &NextVA,
3468 SDValue &Root,
3469 SelectionDAG &DAG,
3470 const SDLoc &dl) const {
3471 MachineFunction &MF = DAG.getMachineFunction();
3472 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3473
3474 const TargetRegisterClass *RC;
3475 if (AFI->isThumb1OnlyFunction())
3476 RC = &ARM::tGPRRegClass;
3477 else
3478 RC = &ARM::GPRRegClass;
3479
3480 // Transform the arguments stored in physical registers into virtual ones.
3481 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3482 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3483
3484 SDValue ArgValue2;
3485 if (NextVA.isMemLoc()) {
3486 MachineFrameInfo &MFI = MF.getFrameInfo();
3487 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3488
3489 // Create load node to retrieve arguments from the stack.
3490 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3491 ArgValue2 = DAG.getLoad(
3492 MVT::i32, dl, Root, FIN,
3493 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3494 } else {
3495 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3496 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3497 }
3498 if (!Subtarget->isLittle())
3499 std::swap (ArgValue, ArgValue2);
3500 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3501}
3502
3503// The remaining GPRs hold either the beginning of variable-argument
3504// data, or the beginning of an aggregate passed by value (usually
3505// byval). Either way, we allocate stack slots adjacent to the data
3506// provided by our caller, and store the unallocated registers there.
3507// If this is a variadic function, the va_list pointer will begin with
3508// these values; otherwise, this reassembles a (byval) structure that
3509// was split between registers and memory.
3510// Return: The frame index registers were stored into.
3511int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3512 const SDLoc &dl, SDValue &Chain,
3513 const Value *OrigArg,
3514 unsigned InRegsParamRecordIdx,
3515 int ArgOffset, unsigned ArgSize) const {
3516 // Currently, two use-cases possible:
3517 // Case #1. Non-var-args function, and we meet first byval parameter.
3518 // Setup first unallocated register as first byval register;
3519 // eat all remained registers
3520 // (these two actions are performed by HandleByVal method).
3521 // Then, here, we initialize stack frame with
3522 // "store-reg" instructions.
3523 // Case #2. Var-args function, that doesn't contain byval parameters.
3524 // The same: eat all remained unallocated registers,
3525 // initialize stack frame.
3526
3527 MachineFunction &MF = DAG.getMachineFunction();
3528 MachineFrameInfo &MFI = MF.getFrameInfo();
3529 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3530 unsigned RBegin, REnd;
3531 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3532 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3533 } else {
3534 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3535 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3536 REnd = ARM::R4;
3537 }
3538
3539 if (REnd != RBegin)
3540 ArgOffset = -4 * (ARM::R4 - RBegin);
3541
3542 auto PtrVT = getPointerTy(DAG.getDataLayout());
3543 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3544 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3545
3546 SmallVector<SDValue, 4> MemOps;
3547 const TargetRegisterClass *RC =
3548 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3549
3550 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3551 unsigned VReg = MF.addLiveIn(Reg, RC);
3552 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3553 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3554 MachinePointerInfo(OrigArg, 4 * i));
3555 MemOps.push_back(Store);
3556 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3557 }
3558
3559 if (!MemOps.empty())
3560 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3561 return FrameIndex;
3562}
3563
3564// Setup stack frame, the va_list pointer will start from.
3565void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3566 const SDLoc &dl, SDValue &Chain,
3567 unsigned ArgOffset,
3568 unsigned TotalArgRegsSaveSize,
3569 bool ForceMutable) const {
3570 MachineFunction &MF = DAG.getMachineFunction();
3571 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3572
3573 // Try to store any remaining integer argument regs
3574 // to their spots on the stack so that they may be loaded by dereferencing
3575 // the result of va_next.
3576 // If there is no regs to be stored, just point address after last
3577 // argument passed via stack.
3578 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3579 CCInfo.getInRegsParamsCount(),
3580 CCInfo.getNextStackOffset(), 4);
3581 AFI->setVarArgsFrameIndex(FrameIndex);
3582}
3583
3584SDValue ARMTargetLowering::LowerFormalArguments(
3585 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3586 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3587 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3588 MachineFunction &MF = DAG.getMachineFunction();
3589 MachineFrameInfo &MFI = MF.getFrameInfo();
3590
3591 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3592
3593 // Assign locations to all of the incoming arguments.
3594 SmallVector<CCValAssign, 16> ArgLocs;
3595 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3596 *DAG.getContext());
3597 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3598
3599 SmallVector<SDValue, 16> ArgValues;
3600 SDValue ArgValue;
3601 Function::const_arg_iterator CurOrigArg = MF.getFunction().arg_begin();
3602 unsigned CurArgIdx = 0;
3603
3604 // Initially ArgRegsSaveSize is zero.
3605 // Then we increase this value each time we meet byval parameter.
3606 // We also increase this value in case of varargs function.
3607 AFI->setArgRegsSaveSize(0);
3608
3609 // Calculate the amount of stack space that we need to allocate to store
3610 // byval and variadic arguments that are passed in registers.
3611 // We need to know this before we allocate the first byval or variadic
3612 // argument, as they will be allocated a stack slot below the CFA (Canonical
3613 // Frame Address, the stack pointer at entry to the function).
3614 unsigned ArgRegBegin = ARM::R4;
3615 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3616 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3617 break;
3618
3619 CCValAssign &VA = ArgLocs[i];
3620 unsigned Index = VA.getValNo();
3621 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3622 if (!Flags.isByVal())
3623 continue;
3624
3625 assert(VA.isMemLoc() && "unexpected byval pointer in reg")(static_cast <bool> (VA.isMemLoc() && "unexpected byval pointer in reg"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3625, __extension__ __PRETTY_FUNCTION__))
;
3626 unsigned RBegin, REnd;
3627 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3628 ArgRegBegin = std::min(ArgRegBegin, RBegin);
3629
3630 CCInfo.nextInRegsParam();
3631 }
3632 CCInfo.rewindByValRegsInfo();
3633
3634 int lastInsIndex = -1;
3635 if (isVarArg && MFI.hasVAStart()) {
3636 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3637 if (RegIdx != array_lengthof(GPRArgRegs))
3638 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3639 }
3640
3641 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3642 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3643 auto PtrVT = getPointerTy(DAG.getDataLayout());
3644
3645 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3646 CCValAssign &VA = ArgLocs[i];
3647 if (Ins[VA.getValNo()].isOrigArg()) {
3648 std::advance(CurOrigArg,
3649 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3650 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3651 }
3652 // Arguments stored in registers.
3653 if (VA.isRegLoc()) {
3654 EVT RegVT = VA.getLocVT();
3655
3656 if (VA.needsCustom()) {
3657 // f64 and vector types are split up into multiple registers or
3658 // combinations of registers and stack slots.
3659 if (VA.getLocVT() == MVT::v2f64) {
3660 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3661 Chain, DAG, dl);
3662 VA = ArgLocs[++i]; // skip ahead to next loc
3663 SDValue ArgValue2;
3664 if (VA.isMemLoc()) {
3665 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3666 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3667 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3668 MachinePointerInfo::getFixedStack(
3669 DAG.getMachineFunction(), FI));
3670 } else {
3671 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3672 Chain, DAG, dl);
3673 }
3674 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3675 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3676 ArgValue, ArgValue1,
3677 DAG.getIntPtrConstant(0, dl));
3678 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3679 ArgValue, ArgValue2,
3680 DAG.getIntPtrConstant(1, dl));
3681 } else
3682 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3683 } else {
3684 const TargetRegisterClass *RC;
3685
3686 if (RegVT == MVT::f32)
3687 RC = &ARM::SPRRegClass;
3688 else if (RegVT == MVT::f64)
3689 RC = &ARM::DPRRegClass;
3690 else if (RegVT == MVT::v2f64)
3691 RC = &ARM::QPRRegClass;
3692 else if (RegVT == MVT::i32)
3693 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3694 : &ARM::GPRRegClass;
3695 else
3696 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3696)
;
3697
3698 // Transform the arguments in physical registers into virtual ones.
3699 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3700 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3701 }
3702
3703 // If this is an 8 or 16-bit value, it is really passed promoted
3704 // to 32 bits. Insert an assert[sz]ext to capture this, then
3705 // truncate to the right size.
3706 switch (VA.getLocInfo()) {
3707 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3707)
;
3708 case CCValAssign::Full: break;
3709 case CCValAssign::BCvt:
3710 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3711 break;
3712 case CCValAssign::SExt:
3713 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3714 DAG.getValueType(VA.getValVT()));
3715 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3716 break;
3717 case CCValAssign::ZExt:
3718 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3719 DAG.getValueType(VA.getValVT()));
3720 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3721 break;
3722 }
3723
3724 InVals.push_back(ArgValue);
3725 } else { // VA.isRegLoc()
3726 // sanity check
3727 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3727, __extension__ __PRETTY_FUNCTION__))
;
3728 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")(static_cast <bool> (VA.getValVT() != MVT::i64 &&
"i64 should already be lowered") ? void (0) : __assert_fail (
"VA.getValVT() != MVT::i64 && \"i64 should already be lowered\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3728, __extension__ __PRETTY_FUNCTION__))
;
3729
3730 int index = VA.getValNo();
3731
3732 // Some Ins[] entries become multiple ArgLoc[] entries.
3733 // Process them only once.
3734 if (index != lastInsIndex)
3735 {
3736 ISD::ArgFlagsTy Flags = Ins[index].Flags;
3737 // FIXME: For now, all byval parameter objects are marked mutable.
3738 // This can be changed with more analysis.
3739 // In case of tail call optimization mark all arguments mutable.
3740 // Since they could be overwritten by lowering of arguments in case of
3741 // a tail call.
3742 if (Flags.isByVal()) {
3743 assert(Ins[index].isOrigArg() &&(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3744, __extension__ __PRETTY_FUNCTION__))
3744 "Byval arguments cannot be implicit")(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3744, __extension__ __PRETTY_FUNCTION__))
;
3745 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3746
3747 int FrameIndex = StoreByValRegs(
3748 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3749 VA.getLocMemOffset(), Flags.getByValSize());
3750 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3751 CCInfo.nextInRegsParam();
3752 } else {
3753 unsigned FIOffset = VA.getLocMemOffset();
3754 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3755 FIOffset, true);
3756
3757 // Create load nodes to retrieve arguments from the stack.
3758 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3759 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3760 MachinePointerInfo::getFixedStack(
3761 DAG.getMachineFunction(), FI)));
3762 }
3763 lastInsIndex = index;
3764 }
3765 }
3766 }
3767
3768 // varargs
3769 if (isVarArg && MFI.hasVAStart())
3770 VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3771 CCInfo.getNextStackOffset(),
3772 TotalArgRegsSaveSize);
3773
3774 AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
3775
3776 return Chain;
3777}
3778
3779/// isFloatingPointZero - Return true if this is +0.0.
3780static bool isFloatingPointZero(SDValue Op) {
3781 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3782 return CFP->getValueAPF().isPosZero();
3783 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3784 // Maybe this has already been legalized into the constant pool?
3785 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3786 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3787 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3788 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3789 return CFP->getValueAPF().isPosZero();
3790 }
3791 } else if (Op->getOpcode() == ISD::BITCAST &&
3792 Op->getValueType(0) == MVT::f64) {
3793 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3794 // created by LowerConstantFP().
3795 SDValue BitcastOp = Op->getOperand(0);
3796 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3797 isNullConstant(BitcastOp->getOperand(0)))
3798 return true;
3799 }
3800 return false;
3801}
3802
3803/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3804/// the given operands.
3805SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3806 SDValue &ARMcc, SelectionDAG &DAG,
3807 const SDLoc &dl) const {
3808 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3809 unsigned C = RHSC->getZExtValue();
3810 if (!isLegalICmpImmediate(C)) {
3811 // Constant does not fit, try adjusting it by one?
3812 switch (CC) {
3813 default: break;
3814 case ISD::SETLT:
3815 case ISD::SETGE:
3816 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3817 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3818 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3819 }
3820 break;
3821 case ISD::SETULT:
3822 case ISD::SETUGE:
3823 if (C != 0 && isLegalICmpImmediate(C-1)) {
3824 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3825 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3826 }
3827 break;
3828 case ISD::SETLE:
3829 case ISD::SETGT:
3830 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3831 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3832 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3833 }
3834 break;
3835 case ISD::SETULE:
3836 case ISD::SETUGT:
3837 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3838 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3839 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3840 }
3841 break;
3842 }
3843 }
3844 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
3845 (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) {
3846 // In ARM and Thumb-2, the compare instructions can shift their second
3847 // operand.
3848 CC = ISD::getSetCCSwappedOperands(CC);
3849 std::swap(LHS, RHS);
3850 }
3851
3852 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3853 ARMISD::NodeType CompareType;
3854 switch (CondCode) {
3855 default:
3856 CompareType = ARMISD::CMP;
3857 break;
3858 case ARMCC::EQ:
3859 case ARMCC::NE:
3860 // Uses only Z Flag
3861 CompareType = ARMISD::CMPZ;
3862 break;
3863 }
3864 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3865 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3866}
3867
3868/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3869SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3870 SelectionDAG &DAG, const SDLoc &dl,
3871 bool InvalidOnQNaN) const {
3872 assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64)(static_cast <bool> (!Subtarget->isFPOnlySP() || RHS
.getValueType() != MVT::f64) ? void (0) : __assert_fail ("!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3872, __extension__ __PRETTY_FUNCTION__))
;
3873 SDValue Cmp;
3874 SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3875 if (!isFloatingPointZero(RHS))
3876 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3877 else
3878 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3879 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3880}
3881
3882/// duplicateCmp - Glue values can have only one use, so this function
3883/// duplicates a comparison node.
3884SDValue
3885ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3886 unsigned Opc = Cmp.getOpcode();
3887 SDLoc DL(Cmp);
3888 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3889 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3890
3891 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")(static_cast <bool> (Opc == ARMISD::FMSTAT && "unexpected comparison operation"
) ? void (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3891, __extension__ __PRETTY_FUNCTION__))
;
3892 Cmp = Cmp.getOperand(0);
3893 Opc = Cmp.getOpcode();
3894 if (Opc == ARMISD::CMPFP)
3895 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3896 Cmp.getOperand(1), Cmp.getOperand(2));
3897 else {
3898 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")(static_cast <bool> (Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"
) ? void (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3898, __extension__ __PRETTY_FUNCTION__))
;
3899 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3900 Cmp.getOperand(1));
3901 }
3902 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3903}
3904
3905std::pair<SDValue, SDValue>
3906ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3907 SDValue &ARMcc) const {
3908 assert(Op.getValueType() == MVT::i32 && "Unsupported value type")(static_cast <bool> (Op.getValueType() == MVT::i32 &&
"Unsupported value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3908, __extension__ __PRETTY_FUNCTION__))
;
3909
3910 SDValue Value, OverflowCmp;
3911 SDValue LHS = Op.getOperand(0);
3912 SDValue RHS = Op.getOperand(1);
3913 SDLoc dl(Op);
3914
3915 // FIXME: We are currently always generating CMPs because we don't support
3916 // generating CMN through the backend. This is not as good as the natural
3917 // CMP case because it causes a register dependency and cannot be folded
3918 // later.
3919
3920 switch (Op.getOpcode()) {
3921 default:
3922 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 3922)
;
3923 case ISD::SADDO:
3924 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3925 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3926 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3927 break;
3928 case ISD::UADDO:
3929 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3930 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3931 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3932 break;
3933 case ISD::SSUBO:
3934 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3935 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3936 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3937 break;
3938 case ISD::USUBO:
3939 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3940 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3941 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3942 break;
3943 } // switch (...)
3944
3945 return std::make_pair(Value, OverflowCmp);
3946}
3947
3948SDValue
3949ARMTargetLowering::LowerSignedALUO(SDValue Op, SelectionDAG &DAG) const {
3950 // Let legalize expand this if it isn't a legal type yet.
3951 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3952 return SDValue();
3953
3954 SDValue Value, OverflowCmp;
3955 SDValue ARMcc;
3956 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3957 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3958 SDLoc dl(Op);
3959 // We use 0 and 1 as false and true values.
3960 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3961 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3962 EVT VT = Op.getValueType();
3963
3964 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
3965 ARMcc, CCR, OverflowCmp);
3966
3967 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3968 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3969}
3970
3971static SDValue ConvertBooleanCarryToCarryFlag(SDValue BoolCarry,
3972 SelectionDAG &DAG) {
3973 SDLoc DL(BoolCarry);
3974 EVT CarryVT = BoolCarry.getValueType();
3975
3976 APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
3977 // This converts the boolean value carry into the carry flag by doing
3978 // ARMISD::ADDC Carry, ~0
3979 return DAG.getNode(ARMISD::ADDC, DL, DAG.getVTList(CarryVT, MVT::i32),
3980 BoolCarry, DAG.getConstant(NegOne, DL, CarryVT));
3981}
3982
3983static SDValue ConvertCarryFlagToBooleanCarry(SDValue Flags, EVT VT,
3984 SelectionDAG &DAG) {
3985 SDLoc DL(Flags);
3986
3987 // Now convert the carry flag into a boolean carry. We do this
3988 // using ARMISD:ADDE 0, 0, Carry
3989 return DAG.getNode(ARMISD::ADDE, DL, DAG.getVTList(VT, MVT::i32),
3990 DAG.getConstant(0, DL, MVT::i32),
3991 DAG.getConstant(0, DL, MVT::i32), Flags);
3992}
3993
3994SDValue ARMTargetLowering::LowerUnsignedALUO(SDValue Op,
3995 SelectionDAG &DAG) const {
3996 // Let legalize expand this if it isn't a legal type yet.
3997 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3998 return SDValue();
3999
4000 SDValue LHS = Op.getOperand(0);
4001 SDValue RHS = Op.getOperand(1);
4002 SDLoc dl(Op);
4003
4004 EVT VT = Op.getValueType();
4005 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
4006 SDValue Value;
4007 SDValue Overflow;
4008 switch (Op.getOpcode()) {
4009 default:
4010 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4010)
;
4011 case ISD::UADDO:
4012 Value = DAG.getNode(ARMISD::ADDC, dl, VTs, LHS, RHS);
4013 // Convert the carry flag into a boolean value.
4014 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4015 break;
4016 case ISD::USUBO: {
4017 Value = DAG.getNode(ARMISD::SUBC, dl, VTs, LHS, RHS);
4018 // Convert the carry flag into a boolean value.
4019 Overflow = ConvertCarryFlagToBooleanCarry(Value.getValue(1), VT, DAG);
4020 // ARMISD::SUBC returns 0 when we have to borrow, so make it an overflow
4021 // value. So compute 1 - C.
4022 Overflow = DAG.getNode(ISD::SUB, dl, MVT::i32,
4023 DAG.getConstant(1, dl, MVT::i32), Overflow);
4024 break;
4025 }
4026 }
4027
4028 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
4029}
4030
4031SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
4032 SDValue Cond = Op.getOperand(0);
4033 SDValue SelectTrue = Op.getOperand(1);
4034 SDValue SelectFalse = Op.getOperand(2);
4035 SDLoc dl(Op);
4036 unsigned Opc = Cond.getOpcode();
4037
4038 if (Cond.getResNo() == 1 &&
4039 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
4040 Opc == ISD::USUBO)) {
4041 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
4042 return SDValue();
4043
4044 SDValue Value, OverflowCmp;
4045 SDValue ARMcc;
4046 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
4047 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4048 EVT VT = Op.getValueType();
4049
4050 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
4051 OverflowCmp, DAG);
4052 }
4053
4054 // Convert:
4055 //
4056 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
4057 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
4058 //
4059 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
4060 const ConstantSDNode *CMOVTrue =
4061 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
4062 const ConstantSDNode *CMOVFalse =
4063 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
4064
4065 if (CMOVTrue && CMOVFalse) {
4066 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
4067 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
4068
4069 SDValue True;
4070 SDValue False;
4071 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4072 True = SelectTrue;
4073 False = SelectFalse;
4074 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4075 True = SelectFalse;
4076 False = SelectTrue;
4077 }
4078
4079 if (True.getNode() && False.getNode()) {
4080 EVT VT = Op.getValueType();
4081 SDValue ARMcc = Cond.getOperand(2);
4082 SDValue CCR = Cond.getOperand(3);
4083 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
4084 assert(True.getValueType() == VT)(static_cast <bool> (True.getValueType() == VT) ? void (
0) : __assert_fail ("True.getValueType() == VT", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4084, __extension__ __PRETTY_FUNCTION__))
;
4085 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
4086 }
4087 }
4088 }
4089
4090 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4091 // undefined bits before doing a full-word comparison with zero.
4092 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4093 DAG.getConstant(1, dl, Cond.getValueType()));
4094
4095 return DAG.getSelectCC(dl, Cond,
4096 DAG.getConstant(0, dl, Cond.getValueType()),
4097 SelectTrue, SelectFalse, ISD::SETNE);
4098}
4099
4100static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
4101 bool &swpCmpOps, bool &swpVselOps) {
4102 // Start by selecting the GE condition code for opcodes that return true for
4103 // 'equality'
4104 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4105 CC == ISD::SETULE)
4106 CondCode = ARMCC::GE;
4107
4108 // and GT for opcodes that return false for 'equality'.
4109 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4110 CC == ISD::SETULT)
4111 CondCode = ARMCC::GT;
4112
4113 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4114 // to swap the compare operands.
4115 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4116 CC == ISD::SETULT)
4117 swpCmpOps = true;
4118
4119 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4120 // If we have an unordered opcode, we need to swap the operands to the VSEL
4121 // instruction (effectively negating the condition).
4122 //
4123 // This also has the effect of swapping which one of 'less' or 'greater'
4124 // returns true, so we also swap the compare operands. It also switches
4125 // whether we return true for 'equality', so we compensate by picking the
4126 // opposite condition code to our original choice.
4127 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4128 CC == ISD::SETUGT) {
4129 swpCmpOps = !swpCmpOps;
4130 swpVselOps = !swpVselOps;
4131 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4132 }
4133
4134 // 'ordered' is 'anything but unordered', so use the VS condition code and
4135 // swap the VSEL operands.
4136 if (CC == ISD::SETO) {
4137 CondCode = ARMCC::VS;
4138 swpVselOps = true;
4139 }
4140
4141 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4142 // code and swap the VSEL operands.
4143 if (CC == ISD::SETUNE) {
4144 CondCode = ARMCC::EQ;
4145 swpVselOps = true;
4146 }
4147}
4148
4149SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4150 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4151 SDValue Cmp, SelectionDAG &DAG) const {
4152 if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
4153 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4154 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4155 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4156 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4157
4158 SDValue TrueLow = TrueVal.getValue(0);
4159 SDValue TrueHigh = TrueVal.getValue(1);
4160 SDValue FalseLow = FalseVal.getValue(0);
4161 SDValue FalseHigh = FalseVal.getValue(1);
4162
4163 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4164 ARMcc, CCR, Cmp);
4165 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4166 ARMcc, CCR, duplicateCmp(Cmp, DAG));
4167
4168 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
4169 } else {
4170 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
4171 Cmp);
4172 }
4173}
4174
4175static bool isGTorGE(ISD::CondCode CC) {
4176 return CC == ISD::SETGT || CC == ISD::SETGE;
4177}
4178
4179static bool isLTorLE(ISD::CondCode CC) {
4180 return CC == ISD::SETLT || CC == ISD::SETLE;
4181}
4182
4183// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
4184// All of these conditions (and their <= and >= counterparts) will do:
4185// x < k ? k : x
4186// x > k ? x : k
4187// k < x ? x : k
4188// k > x ? k : x
4189static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
4190 const SDValue TrueVal, const SDValue FalseVal,
4191 const ISD::CondCode CC, const SDValue K) {
4192 return (isGTorGE(CC) &&
4193 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
4194 (isLTorLE(CC) &&
4195 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
4196}
4197
4198// Similar to isLowerSaturate(), but checks for upper-saturating conditions.
4199static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
4200 const SDValue TrueVal, const SDValue FalseVal,
4201 const ISD::CondCode CC, const SDValue K) {
4202 return (isGTorGE(CC) &&
4203 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
4204 (isLTorLE(CC) &&
4205 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
4206}
4207
4208// Check if two chained conditionals could be converted into SSAT.
4209//
4210// SSAT can replace a set of two conditional selectors that bound a number to an
4211// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
4212//
4213// x < -k ? -k : (x > k ? k : x)
4214// x < -k ? -k : (x < k ? x : k)
4215// x > -k ? (x > k ? k : x) : -k
4216// x < k ? (x < -k ? -k : x) : k
4217// etc.
4218//
4219// It returns true if the conversion can be done, false otherwise.
4220// Additionally, the variable is returned in parameter V and the constant in K.
4221static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
4222 uint64_t &K) {
4223 SDValue LHS1 = Op.getOperand(0);
4224 SDValue RHS1 = Op.getOperand(1);
4225 SDValue TrueVal1 = Op.getOperand(2);
4226 SDValue FalseVal1 = Op.getOperand(3);
4227 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4228
4229 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
4230 if (Op2.getOpcode() != ISD::SELECT_CC)
4231 return false;
4232
4233 SDValue LHS2 = Op2.getOperand(0);
4234 SDValue RHS2 = Op2.getOperand(1);
4235 SDValue TrueVal2 = Op2.getOperand(2);
4236 SDValue FalseVal2 = Op2.getOperand(3);
4237 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
4238
4239 // Find out which are the constants and which are the variables
4240 // in each conditional
4241 SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
4242 ? &RHS1
4243 : nullptr;
4244 SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
4245 ? &RHS2
4246 : nullptr;
4247 SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
4248 SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
4249 SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
4250 SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
4251
4252 // We must detect cases where the original operations worked with 16- or
4253 // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
4254 // must work with sign-extended values but the select operations return
4255 // the original non-extended value.
4256 SDValue V2TmpReg = V2Tmp;
4257 if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
4258 V2TmpReg = V2Tmp->getOperand(0);
4259
4260 // Check that the registers and the constants have the correct values
4261 // in both conditionals
4262 if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
4263 V2TmpReg != V2)
4264 return false;
4265
4266 // Figure out which conditional is saturating the lower/upper bound.
4267 const SDValue *LowerCheckOp =
4268 isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4269 ? &Op
4270 : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4271 ? &Op2
4272 : nullptr;
4273 const SDValue *UpperCheckOp =
4274 isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4275 ? &Op
4276 : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4277 ? &Op2
4278 : nullptr;
4279
4280 if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
4281 return false;
4282
4283 // Check that the constant in the lower-bound check is
4284 // the opposite of the constant in the upper-bound check
4285 // in 1's complement.
4286 int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
4287 int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
4288 int64_t PosVal = std::max(Val1, Val2);
4289
4290 if (((Val1 > Val2 && UpperCheckOp == &Op) ||
4291 (Val1 < Val2 && UpperCheckOp == &Op2)) &&
4292 Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {
4293
4294 V = V2;
4295 K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
4296 return true;
4297 }
4298
4299 return false;
4300}
4301
4302SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4303 EVT VT = Op.getValueType();
4304 SDLoc dl(Op);
4305
4306 // Try to convert two saturating conditional selects into a single SSAT
4307 SDValue SatValue;
4308 uint64_t SatConstant;
4309 if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
4310 isSaturatingConditional(Op, SatValue, SatConstant))
4311 return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4312 DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4313
4314 SDValue LHS = Op.getOperand(0);
4315 SDValue RHS = Op.getOperand(1);
4316 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4317 SDValue TrueVal = Op.getOperand(2);
4318 SDValue FalseVal = Op.getOperand(3);
4319
4320 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4321 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4322 dl);
4323
4324 // If softenSetCCOperands only returned one value, we should compare it to
4325 // zero.
4326 if (!RHS.getNode()) {
4327 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4328 CC = ISD::SETNE;
4329 }
4330 }
4331
4332 if (LHS.getValueType() == MVT::i32) {
4333 // Try to generate VSEL on ARMv8.
4334 // The VSEL instruction can't use all the usual ARM condition
4335 // codes: it only has two bits to select the condition code, so it's
4336 // constrained to use only GE, GT, VS and EQ.
4337 //
4338 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
4339 // swap the operands of the previous compare instruction (effectively
4340 // inverting the compare condition, swapping 'less' and 'greater') and
4341 // sometimes need to swap the operands to the VSEL (which inverts the
4342 // condition in the sense of firing whenever the previous condition didn't)
4343 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4344 TrueVal.getValueType() == MVT::f64)) {
4345 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4346 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
4347 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
4348 CC = ISD::getSetCCInverse(CC, true);
4349 std::swap(TrueVal, FalseVal);
4350 }
4351 }
4352
4353 SDValue ARMcc;
4354 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4355 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4356 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4357 }
4358
4359 ARMCC::CondCodes CondCode, CondCode2;
4360 bool InvalidOnQNaN;
4361 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4362
4363 // Try to generate VMAXNM/VMINNM on ARMv8.
4364 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4365 TrueVal.getValueType() == MVT::f64)) {
4366 bool swpCmpOps = false;
4367 bool swpVselOps = false;
4368 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
4369
4370 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
4371 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
4372 if (swpCmpOps)
4373 std::swap(LHS, RHS);
4374 if (swpVselOps)
4375 std::swap(TrueVal, FalseVal);
4376 }
4377 }
4378
4379 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4380 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4381 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4382 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4383 if (CondCode2 != ARMCC::AL) {
4384 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
4385 // FIXME: Needs another CMP because flag can have but one use.
4386 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4387 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
4388 }
4389 return Result;
4390}
4391
4392/// canChangeToInt - Given the fp compare operand, return true if it is suitable
4393/// to morph to an integer compare sequence.
4394static bool canChangeToInt(SDValue Op, bool &SeenZero,
4395 const ARMSubtarget *Subtarget) {
4396 SDNode *N = Op.getNode();
4397 if (!N->hasOneUse())
4398 // Otherwise it requires moving the value from fp to integer registers.
4399 return false;
4400 if (!N->getNumValues())
4401 return false;
4402 EVT VT = Op.getValueType();
4403 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
4404 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
4405 // vmrs are very slow, e.g. cortex-a8.
4406 return false;
4407
4408 if (isFloatingPointZero(Op)) {
4409 SeenZero = true;
4410 return true;
4411 }
4412 return ISD::isNormalLoad(N);
4413}
4414
4415static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
4416 if (isFloatingPointZero(Op))
4417 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
4418
4419 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
4420 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
4421 Ld->getPointerInfo(), Ld->getAlignment(),
4422 Ld->getMemOperand()->getFlags());
4423
4424 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4424)
;
4425}
4426
4427static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
4428 SDValue &RetVal1, SDValue &RetVal2) {
4429 SDLoc dl(Op);
4430
4431 if (isFloatingPointZero(Op)) {
4432 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
4433 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
4434 return;
4435 }
4436
4437 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
4438 SDValue Ptr = Ld->getBasePtr();
4439 RetVal1 =
4440 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
4441 Ld->getAlignment(), Ld->getMemOperand()->getFlags());
4442
4443 EVT PtrType = Ptr.getValueType();
4444 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
4445 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
4446 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
4447 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
4448 Ld->getPointerInfo().getWithOffset(4), NewAlign,
4449 Ld->getMemOperand()->getFlags());
4450 return;
4451 }
4452
4453 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4453)
;
4454}
4455
4456/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
4457/// f32 and even f64 comparisons to integer ones.
4458SDValue
4459ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
4460 SDValue Chain = Op.getOperand(0);
4461 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4462 SDValue LHS = Op.getOperand(2);
4463 SDValue RHS = Op.getOperand(3);
4464 SDValue Dest = Op.getOperand(4);
4465 SDLoc dl(Op);
4466
4467 bool LHSSeenZero = false;
4468 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
4469 bool RHSSeenZero = false;
4470 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
4471 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
4472 // If unsafe fp math optimization is enabled and there are no other uses of
4473 // the CMP operands, and the condition code is EQ or NE, we can optimize it
4474 // to an integer comparison.
4475 if (CC == ISD::SETOEQ)
4476 CC = ISD::SETEQ;
4477 else if (CC == ISD::SETUNE)
4478 CC = ISD::SETNE;
4479
4480 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4481 SDValue ARMcc;
4482 if (LHS.getValueType() == MVT::f32) {
4483 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4484 bitcastf32Toi32(LHS, DAG), Mask);
4485 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4486 bitcastf32Toi32(RHS, DAG), Mask);
4487 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4488 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4489 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4490 Chain, Dest, ARMcc, CCR, Cmp);
4491 }
4492
4493 SDValue LHS1, LHS2;
4494 SDValue RHS1, RHS2;
4495 expandf64Toi32(LHS, DAG, LHS1, LHS2);
4496 expandf64Toi32(RHS, DAG, RHS1, RHS2);
4497 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
4498 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
4499 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4500 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4501 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4502 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
4503 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
4504 }
4505
4506 return SDValue();
4507}
4508
4509SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4510 SDValue Chain = Op.getOperand(0);
4511 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4512 SDValue LHS = Op.getOperand(2);
4513 SDValue RHS = Op.getOperand(3);
4514 SDValue Dest = Op.getOperand(4);
4515 SDLoc dl(Op);
4516
4517 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4518 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4519 dl);
4520
4521 // If softenSetCCOperands only returned one value, we should compare it to
4522 // zero.
4523 if (!RHS.getNode()) {
4524 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4525 CC = ISD::SETNE;
4526 }
4527 }
4528
4529 if (LHS.getValueType() == MVT::i32) {
4530 SDValue ARMcc;
4531 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4532 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4533 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4534 Chain, Dest, ARMcc, CCR, Cmp);
4535 }
4536
4537 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f32 || LHS
.getValueType() == MVT::f64) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4537, __extension__ __PRETTY_FUNCTION__))
;
4538
4539 if (getTargetMachine().Options.UnsafeFPMath &&
4540 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
4541 CC == ISD::SETNE || CC == ISD::SETUNE)) {
4542 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
4543 return Result;
4544 }
4545
4546 ARMCC::CondCodes CondCode, CondCode2;
4547 bool InvalidOnQNaN;
4548 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4549
4550 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4551 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4552 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4553 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4554 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
4555 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4556 if (CondCode2 != ARMCC::AL) {
4557 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
4558 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
4559 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4560 }
4561 return Res;
4562}
4563
4564SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
4565 SDValue Chain = Op.getOperand(0);
4566 SDValue Table = Op.getOperand(1);
4567 SDValue Index = Op.getOperand(2);
4568 SDLoc dl(Op);
4569
4570 EVT PTy = getPointerTy(DAG.getDataLayout());
4571 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
4572 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
4573 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
4574 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
4575 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
4576 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
4577 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
4578 // which does another jump to the destination. This also makes it easier
4579 // to translate it to TBB / TBH later (Thumb2 only).
4580 // FIXME: This might not work if the function is extremely large.
4581 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
4582 Addr, Op.getOperand(2), JTI);
4583 }
4584 if (isPositionIndependent() || Subtarget->isROPI()) {
4585 Addr =
4586 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
4587 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4588 Chain = Addr.getValue(1);
4589 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
4590 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4591 } else {
4592 Addr =
4593 DAG.getLoad(PTy, dl, Chain, Addr,
4594 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4595 Chain = Addr.getValue(1);
4596 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4597 }
4598}
4599
4600static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
4601 EVT VT = Op.getValueType();
4602 SDLoc dl(Op);
4603
4604 if (Op.getValueType().getVectorElementType() == MVT::i32) {
4605 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
4606 return Op;
4607 return DAG.UnrollVectorOp(Op.getNode());
4608 }
4609
4610 assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&(static_cast <bool> (Op.getOperand(0).getValueType() ==
MVT::v4f32 && "Invalid type for custom lowering!") ?
void (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4611, __extension__ __PRETTY_FUNCTION__))
4611 "Invalid type for custom lowering!")(static_cast <bool> (Op.getOperand(0).getValueType() ==
MVT::v4f32 && "Invalid type for custom lowering!") ?
void (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4611, __extension__ __PRETTY_FUNCTION__))
;
4612 if (VT != MVT::v4i16)
4613 return DAG.UnrollVectorOp(Op.getNode());
4614
4615 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
4616 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
4617}
4618
4619SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
4620 EVT VT = Op.getValueType();
4621 if (VT.isVector())
4622 return LowerVectorFP_TO_INT(Op, DAG);
4623 if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
4624 RTLIB::Libcall LC;
4625 if (Op.getOpcode() == ISD::FP_TO_SINT)
4626 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
4627 Op.getValueType());
4628 else
4629 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
4630 Op.getValueType());
4631 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4632 /*isSigned*/ false, SDLoc(Op)).first;
4633 }
4634
4635 return Op;
4636}
4637
4638static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
4639 EVT VT = Op.getValueType();
4640 SDLoc dl(Op);
4641
4642 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
4643 if (VT.getVectorElementType() == MVT::f32)
4644 return Op;
4645 return DAG.UnrollVectorOp(Op.getNode());
4646 }
4647
4648 assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&(static_cast <bool> (Op.getOperand(0).getValueType() ==
MVT::v4i16 && "Invalid type for custom lowering!") ?
void (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4649, __extension__ __PRETTY_FUNCTION__))
4649 "Invalid type for custom lowering!")(static_cast <bool> (Op.getOperand(0).getValueType() ==
MVT::v4i16 && "Invalid type for custom lowering!") ?
void (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4649, __extension__ __PRETTY_FUNCTION__))
;
4650 if (VT != MVT::v4f32)
4651 return DAG.UnrollVectorOp(Op.getNode());
4652
4653 unsigned CastOpc;
4654 unsigned Opc;
4655 switch (Op.getOpcode()) {
4656 default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4656)
;
4657 case ISD::SINT_TO_FP:
4658 CastOpc = ISD::SIGN_EXTEND;
4659 Opc = ISD::SINT_TO_FP;
4660 break;
4661 case ISD::UINT_TO_FP:
4662 CastOpc = ISD::ZERO_EXTEND;
4663 Opc = ISD::UINT_TO_FP;
4664 break;
4665 }
4666
4667 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
4668 return DAG.getNode(Opc, dl, VT, Op);
4669}
4670
4671SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
4672 EVT VT = Op.getValueType();
4673 if (VT.isVector())
4674 return LowerVectorINT_TO_FP(Op, DAG);
4675 if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
4676 RTLIB::Libcall LC;
4677 if (Op.getOpcode() == ISD::SINT_TO_FP)
4678 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
4679 Op.getValueType());
4680 else
4681 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
4682 Op.getValueType());
4683 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4684 /*isSigned*/ false, SDLoc(Op)).first;
4685 }
4686
4687 return Op;
4688}
4689
4690SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
4691 // Implement fcopysign with a fabs and a conditional fneg.
4692 SDValue Tmp0 = Op.getOperand(0);
4693 SDValue Tmp1 = Op.getOperand(1);
4694 SDLoc dl(Op);
4695 EVT VT = Op.getValueType();
4696 EVT SrcVT = Tmp1.getValueType();
4697 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
4698 Tmp0.getOpcode() == ARMISD::VMOVDRR;
4699 bool UseNEON = !InGPR && Subtarget->hasNEON();
4700
4701 if (UseNEON) {
4702 // Use VBSL to copy the sign bit.
4703 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
4704 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
4705 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
4706 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
4707 if (VT == MVT::f64)
4708 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4709 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
4710 DAG.getConstant(32, dl, MVT::i32));
4711 else /*if (VT == MVT::f32)*/
4712 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
4713 if (SrcVT == MVT::f32) {
4714 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
4715 if (VT == MVT::f64)
4716 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4717 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
4718 DAG.getConstant(32, dl, MVT::i32));
4719 } else if (VT == MVT::f32)
4720 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
4721 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
4722 DAG.getConstant(32, dl, MVT::i32));
4723 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
4724 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
4725
4726 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
4727 dl, MVT::i32);
4728 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
4729 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
4730 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
4731
4732 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
4733 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
4734 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
4735 if (VT == MVT::f32) {
4736 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
4737 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
4738 DAG.getConstant(0, dl, MVT::i32));
4739 } else {
4740 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
4741 }
4742
4743 return Res;
4744 }
4745
4746 // Bitcast operand 1 to i32.
4747 if (SrcVT == MVT::f64)
4748 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4749 Tmp1).getValue(1);
4750 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
4751
4752 // Or in the signbit with integer operations.
4753 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
4754 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4755 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
4756 if (VT == MVT::f32) {
4757 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
4758 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
4759 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4760 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
4761 }
4762
4763 // f64: Or the high part with signbit and then combine two parts.
4764 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4765 Tmp0);
4766 SDValue Lo = Tmp0.getValue(0);
4767 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
4768 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
4769 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
4770}
4771
4772SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
4773 MachineFunction &MF = DAG.getMachineFunction();
4774 MachineFrameInfo &MFI = MF.getFrameInfo();
4775 MFI.setReturnAddressIsTaken(true);
4776
4777 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
4778 return SDValue();
4779
4780 EVT VT = Op.getValueType();
4781 SDLoc dl(Op);
4782 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4783 if (Depth) {
4784 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4785 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
4786 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
4787 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
4788 MachinePointerInfo());
4789 }
4790
4791 // Return LR, which contains the return address. Mark it an implicit live-in.
4792 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4793 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
4794}
4795
4796SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
4797 const ARMBaseRegisterInfo &ARI =
4798 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
4799 MachineFunction &MF = DAG.getMachineFunction();
4800 MachineFrameInfo &MFI = MF.getFrameInfo();
4801 MFI.setFrameAddressIsTaken(true);
4802
4803 EVT VT = Op.getValueType();
4804 SDLoc dl(Op); // FIXME probably not meaningful
4805 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4806 unsigned FrameReg = ARI.getFrameRegister(MF);
4807 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
4808 while (Depth--)
4809 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
4810 MachinePointerInfo());
4811 return FrameAddr;
4812}
4813
4814// FIXME? Maybe this could be a TableGen attribute on some registers and
4815// this table could be generated automatically from RegInfo.
4816unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
4817 SelectionDAG &DAG) const {
4818 unsigned Reg = StringSwitch<unsigned>(RegName)
4819 .Case("sp", ARM::SP)
4820 .Default(0);
4821 if (Reg)
4822 return Reg;
4823 report_fatal_error(Twine("Invalid register name \""
4824 + StringRef(RegName) + "\"."));
4825}
4826
4827// Result is 64 bit value so split into two 32 bit values and return as a
4828// pair of values.
4829static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
4830 SelectionDAG &DAG) {
4831 SDLoc DL(N);
4832
4833 // This function is only supposed to be called for i64 type destination.
4834 assert(N->getValueType(0) == MVT::i64(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"ExpandREAD_REGISTER called for non-i64 type result.") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4835, __extension__ __PRETTY_FUNCTION__))
4835 && "ExpandREAD_REGISTER called for non-i64 type result.")(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"ExpandREAD_REGISTER called for non-i64 type result.") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4835, __extension__ __PRETTY_FUNCTION__))
;
4836
4837 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
4838 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
4839 N->getOperand(0),
4840 N->getOperand(1));
4841
4842 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
4843 Read.getValue(1)));
4844 Results.push_back(Read.getOperand(0));
4845}
4846
4847/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
4848/// When \p DstVT, the destination type of \p BC, is on the vector
4849/// register bank and the source of bitcast, \p Op, operates on the same bank,
4850/// it might be possible to combine them, such that everything stays on the
4851/// vector register bank.
4852/// \p return The node that would replace \p BT, if the combine
4853/// is possible.
4854static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
4855 SelectionDAG &DAG) {
4856 SDValue Op = BC->getOperand(0);
4857 EVT DstVT = BC->getValueType(0);
4858
4859 // The only vector instruction that can produce a scalar (remember,
4860 // since the bitcast was about to be turned into VMOVDRR, the source
4861 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
4862 // Moreover, we can do this combine only if there is one use.
4863 // Finally, if the destination type is not a vector, there is not
4864 // much point on forcing everything on the vector bank.
4865 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
4866 !Op.hasOneUse())
4867 return SDValue();
4868
4869 // If the index is not constant, we will introduce an additional
4870 // multiply that will stick.
4871 // Give up in that case.
4872 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4873 if (!Index)
4874 return SDValue();
4875 unsigned DstNumElt = DstVT.getVectorNumElements();
4876
4877 // Compute the new index.
4878 const APInt &APIntIndex = Index->getAPIntValue();
4879 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
4880 NewIndex *= APIntIndex;
4881 // Check if the new constant index fits into i32.
4882 if (NewIndex.getBitWidth() > 32)
4883 return SDValue();
4884
4885 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
4886 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
4887 SDLoc dl(Op);
4888 SDValue ExtractSrc = Op.getOperand(0);
4889 EVT VecVT = EVT::getVectorVT(
4890 *DAG.getContext(), DstVT.getScalarType(),
4891 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
4892 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
4893 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
4894 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
4895}
4896
4897/// ExpandBITCAST - If the target supports VFP, this function is called to
4898/// expand a bit convert where either the source or destination type is i64 to
4899/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
4900/// operand type is illegal (e.g., v2f32 for a target that doesn't support
4901/// vectors), since the legalizer won't know what to do with that.
4902static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
4903 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4904 SDLoc dl(N);
4905 SDValue Op = N->getOperand(0);
4906
4907 // This function is only supposed to be called for i64 types, either as the
4908 // source or destination of the bit convert.
4909 EVT SrcVT = Op.getValueType();
4910 EVT DstVT = N->getValueType(0);
4911 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&(static_cast <bool> ((SrcVT == MVT::i64 || DstVT == MVT
::i64) && "ExpandBITCAST called for non-i64 type") ? void
(0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4912, __extension__ __PRETTY_FUNCTION__))
4912 "ExpandBITCAST called for non-i64 type")(static_cast <bool> ((SrcVT == MVT::i64 || DstVT == MVT
::i64) && "ExpandBITCAST called for non-i64 type") ? void
(0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4912, __extension__ __PRETTY_FUNCTION__))
;
4913
4914 // Turn i64->f64 into VMOVDRR.
4915 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
4916 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
4917 // if we can combine the bitcast with its source.
4918 if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
4919 return Val;
4920
4921 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4922 DAG.getConstant(0, dl, MVT::i32));
4923 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4924 DAG.getConstant(1, dl, MVT::i32));
4925 return DAG.getNode(ISD::BITCAST, dl, DstVT,
4926 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
4927 }
4928
4929 // Turn f64->i64 into VMOVRRD.
4930 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
4931 SDValue Cvt;
4932 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
4933 SrcVT.getVectorNumElements() > 1)
4934 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4935 DAG.getVTList(MVT::i32, MVT::i32),
4936 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
4937 else
4938 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4939 DAG.getVTList(MVT::i32, MVT::i32), Op);
4940 // Merge the pieces into a single i64 value.
4941 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
4942 }
4943
4944 return SDValue();
4945}
4946
4947/// getZeroVector - Returns a vector of specified type with all zero elements.
4948/// Zero vectors are used to represent vector negation and in those cases
4949/// will be implemented with the NEON VNEG instruction. However, VNEG does
4950/// not support i64 elements, so sometimes the zero vectors will need to be
4951/// explicitly constructed. Regardless, use a canonical VMOV to create the
4952/// zero vector.
4953static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4954 assert(VT.isVector() && "Expected a vector type")(static_cast <bool> (VT.isVector() && "Expected a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"Expected a vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4954, __extension__ __PRETTY_FUNCTION__))
;
4955 // The canonical modified immediate encoding of a zero vector is....0!
4956 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
4957 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
4958 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
4959 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4960}
4961
4962/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4963/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4964SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
4965 SelectionDAG &DAG) const {
4966 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4966, __extension__ __PRETTY_FUNCTION__))
;
4967 EVT VT = Op.getValueType();
4968 unsigned VTBits = VT.getSizeInBits();
4969 SDLoc dl(Op);
4970 SDValue ShOpLo = Op.getOperand(0);
4971 SDValue ShOpHi = Op.getOperand(1);
4972 SDValue ShAmt = Op.getOperand(2);
4973 SDValue ARMcc;
4974 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4975 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4976
4977 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SRA_PARTS ||
Op.getOpcode() == ISD::SRL_PARTS) ? void (0) : __assert_fail
("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 4977, __extension__ __PRETTY_FUNCTION__))
;
4978
4979 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4980 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4981 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4982 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4983 DAG.getConstant(VTBits, dl, MVT::i32));
4984 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4985 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4986 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4987 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4988 ISD::SETGE, ARMcc, DAG, dl);
4989 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
4990 ARMcc, CCR, CmpLo);
4991
4992 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4993 SDValue HiBigShift = Opc == ISD::SRA
4994 ? DAG.getNode(Opc, dl, VT, ShOpHi,
4995 DAG.getConstant(VTBits - 1, dl, VT))
4996 : DAG.getConstant(0, dl, VT);
4997 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4998 ISD::SETGE, ARMcc, DAG, dl);
4999 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
5000 ARMcc, CCR, CmpHi);
5001
5002 SDValue Ops[2] = { Lo, Hi };
5003 return DAG.getMergeValues(Ops, dl);
5004}
5005
5006/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
5007/// i32 values and take a 2 x i32 value to shift plus a shift amount.
5008SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
5009 SelectionDAG &DAG) const {
5010 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5010, __extension__ __PRETTY_FUNCTION__))
;
5011 EVT VT = Op.getValueType();
5012 unsigned VTBits = VT.getSizeInBits();
5013 SDLoc dl(Op);
5014 SDValue ShOpLo = Op.getOperand(0);
5015 SDValue ShOpHi = Op.getOperand(1);
5016 SDValue ShAmt = Op.getOperand(2);
5017 SDValue ARMcc;
5018 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5019
5020 assert(Op.getOpcode() == ISD::SHL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SHL_PARTS) ?
void (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5020, __extension__ __PRETTY_FUNCTION__))
;
5021 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
5022 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
5023 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
5024 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
5025 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
5026
5027 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
5028 DAG.getConstant(VTBits, dl, MVT::i32));
5029 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
5030 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5031 ISD::SETGE, ARMcc, DAG, dl);
5032 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
5033 ARMcc, CCR, CmpHi);
5034
5035 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
5036 ISD::SETGE, ARMcc, DAG, dl);
5037 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
5038 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
5039 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
5040
5041 SDValue Ops[2] = { Lo, Hi };
5042 return DAG.getMergeValues(Ops, dl);
5043}
5044
5045SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
5046 SelectionDAG &DAG) const {
5047 // The rounding mode is in bits 23:22 of the FPSCR.
5048 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
5049 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
5050 // so that the shift + and get folded into a bitfield extract.
5051 SDLoc dl(Op);
5052 SDValue Ops[] = { DAG.getEntryNode(),
5053 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
5054
5055 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
5056 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
5057 DAG.getConstant(1U << 22, dl, MVT::i32));
5058 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
5059 DAG.getConstant(22, dl, MVT::i32));
5060 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
5061 DAG.getConstant(3, dl, MVT::i32));
5062}
5063
5064static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
5065 const ARMSubtarget *ST) {
5066 SDLoc dl(N);
5067 EVT VT = N->getValueType(0);
5068 if (VT.isVector()) {
5069 assert(ST->hasNEON())(static_cast <bool> (ST->hasNEON()) ? void (0) : __assert_fail
("ST->hasNEON()", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5069, __extension__ __PRETTY_FUNCTION__))
;
5070
5071 // Compute the least significant set bit: LSB = X & -X
5072 SDValue X = N->getOperand(0);
5073 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
5074 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
5075
5076 EVT ElemTy = VT.getVectorElementType();
5077
5078 if (ElemTy == MVT::i8) {
5079 // Compute with: cttz(x) = ctpop(lsb - 1)
5080 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5081 DAG.getTargetConstant(1, dl, ElemTy));
5082 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5083 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
5084 }
5085
5086 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
5087 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
5088 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
5089 unsigned NumBits = ElemTy.getSizeInBits();
5090 SDValue WidthMinus1 =
5091 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5092 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
5093 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
5094 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
5095 }
5096
5097 // Compute with: cttz(x) = ctpop(lsb - 1)
5098
5099 // Since we can only compute the number of bits in a byte with vcnt.8, we
5100 // have to gather the result with pairwise addition (vpaddl) for i16, i32,
5101 // and i64.
5102
5103 // Compute LSB - 1.
5104 SDValue Bits;
5105 if (ElemTy == MVT::i64) {
5106 // Load constant 0xffff'ffff'ffff'ffff to register.
5107 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5108 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
5109 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
5110 } else {
5111 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5112 DAG.getTargetConstant(1, dl, ElemTy));
5113 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5114 }
5115
5116 // Count #bits with vcnt.8.
5117 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5118 SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
5119 SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
5120
5121 // Gather the #bits with vpaddl (pairwise add.)
5122 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5123 SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
5124 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5125 Cnt8);
5126 if (ElemTy == MVT::i16)
5127 return Cnt16;
5128
5129 EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
5130 SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
5131 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5132 Cnt16);
5133 if (ElemTy == MVT::i32)
5134 return Cnt32;
5135
5136 assert(ElemTy == MVT::i64)(static_cast <bool> (ElemTy == MVT::i64) ? void (0) : __assert_fail
("ElemTy == MVT::i64", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5136, __extension__ __PRETTY_FUNCTION__))
;
5137 SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5138 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5139 Cnt32);
5140 return Cnt64;
5141 }
5142
5143 if (!ST->hasV6T2Ops())
5144 return SDValue();
5145
5146 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
5147 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
5148}
5149
5150/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
5151/// for each 16-bit element from operand, repeated. The basic idea is to
5152/// leverage vcnt to get the 8-bit counts, gather and add the results.
5153///
5154/// Trace for v4i16:
5155/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
5156/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
5157/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
5158/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
5159/// [b0 b1 b2 b3 b4 b5 b6 b7]
5160/// +[b1 b0 b3 b2 b5 b4 b7 b6]
5161/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
5162/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
5163static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
5164 EVT VT = N->getValueType(0);
5165 SDLoc DL(N);
5166
5167 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5168 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
5169 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
5170 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
5171 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
5172 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
5173}
5174
5175/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
5176/// bit-count for each 16-bit element from the operand. We need slightly
5177/// different sequencing for v4i16 and v8i16 to stay within NEON's available
5178/// 64/128-bit registers.
5179///
5180/// Trace for v4i16:
5181/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
5182/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
5183/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
5184/// v4i16:Extracted = [k0 k1 k2 k3 ]
5185static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
5186 EVT VT = N->getValueType(0);
5187 SDLoc DL(N);
5188
5189 SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
5190 if (VT.is64BitVector()) {
5191 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
5192 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
5193 DAG.getIntPtrConstant(0, DL));
5194 } else {
5195 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
5196 BitCounts, DAG.getIntPtrConstant(0, DL));
5197 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
5198 }
5199}
5200
5201/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
5202/// bit-count for each 32-bit element from the operand. The idea here is
5203/// to split the vector into 16-bit elements, leverage the 16-bit count
5204/// routine, and then combine the results.
5205///
5206/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
5207/// input = [v0 v1 ] (vi: 32-bit elements)
5208/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
5209/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
5210/// vrev: N0 = [k1 k0 k3 k2 ]
5211/// [k0 k1 k2 k3 ]
5212/// N1 =+[k1 k0 k3 k2 ]
5213/// [k0 k2 k1 k3 ]
5214/// N2 =+[k1 k3 k0 k2 ]
5215/// [k0 k2 k1 k3 ]
5216/// Extended =+[k1 k3 k0 k2 ]
5217/// [k0 k2 ]
5218/// Extracted=+[k1 k3 ]
5219///
5220static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
5221 EVT VT = N->getValueType(0);
5222 SDLoc DL(N);
5223
5224 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5225
5226 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
5227 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
5228 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
5229 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
5230 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
5231
5232 if (VT.is64BitVector()) {
5233 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
5234 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
5235 DAG.getIntPtrConstant(0, DL));
5236 } else {
5237 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
5238 DAG.getIntPtrConstant(0, DL));
5239 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
5240 }
5241}
5242
5243static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
5244 const ARMSubtarget *ST) {
5245 EVT VT = N->getValueType(0);
5246
5247 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.")(static_cast <bool> (ST->hasNEON() && "Custom ctpop lowering requires NEON."
) ? void (0) : __assert_fail ("ST->hasNEON() && \"Custom ctpop lowering requires NEON.\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5247, __extension__ __PRETTY_FUNCTION__))
;
5248 assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||(static_cast <bool> ((VT == MVT::v2i32 || VT == MVT::v4i32
|| VT == MVT::v4i16 || VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5250, __extension__ __PRETTY_FUNCTION__))
5249 VT == MVT::v4i16 || VT == MVT::v8i16) &&(static_cast <bool> ((VT == MVT::v2i32 || VT == MVT::v4i32
|| VT == MVT::v4i16 || VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5250, __extension__ __PRETTY_FUNCTION__))
5250 "Unexpected type for custom ctpop lowering")(static_cast <bool> ((VT == MVT::v2i32 || VT == MVT::v4i32
|| VT == MVT::v4i16 || VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5250, __extension__ __PRETTY_FUNCTION__))
;
5251
5252 if (VT.getVectorElementType() == MVT::i32)
5253 return lowerCTPOP32BitElements(N, DAG);
5254 else
5255 return lowerCTPOP16BitElements(N, DAG);
5256}
5257
5258static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
5259 const ARMSubtarget *ST) {
5260 EVT VT = N->getValueType(0);
5261 SDLoc dl(N);
5262
5263 if (!VT.isVector())
5264 return SDValue();
5265
5266 // Lower vector shifts on NEON to use VSHL.
5267 assert(ST->hasNEON() && "unexpected vector shift")(static_cast <bool> (ST->hasNEON() && "unexpected vector shift"
) ? void (0) : __assert_fail ("ST->hasNEON() && \"unexpected vector shift\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5267, __extension__ __PRETTY_FUNCTION__))
;
5268
5269 // Left shifts translate directly to the vshiftu intrinsic.
5270 if (N->getOpcode() == ISD::SHL)
5271 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5272 DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
5273 MVT::i32),
5274 N->getOperand(0), N->getOperand(1));
5275
5276 assert((N->getOpcode() == ISD::SRA ||(static_cast <bool> ((N->getOpcode() == ISD::SRA || N
->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5277, __extension__ __PRETTY_FUNCTION__))
5277 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode")(static_cast <bool> ((N->getOpcode() == ISD::SRA || N
->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5277, __extension__ __PRETTY_FUNCTION__))
;
5278
5279 // NEON uses the same intrinsics for both left and right shifts. For
5280 // right shifts, the shift amounts are negative, so negate the vector of
5281 // shift amounts.
5282 EVT ShiftVT = N->getOperand(1).getValueType();
5283 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
5284 getZeroVector(ShiftVT, DAG, dl),
5285 N->getOperand(1));
5286 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
5287 Intrinsic::arm_neon_vshifts :
5288 Intrinsic::arm_neon_vshiftu);
5289 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5290 DAG.getConstant(vshiftInt, dl, MVT::i32),
5291 N->getOperand(0), NegatedCount);
5292}
5293
5294static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
5295 const ARMSubtarget *ST) {
5296 EVT VT = N->getValueType(0);
5297 SDLoc dl(N);
5298
5299 // We can get here for a node like i32 = ISD::SHL i32, i64
5300 if (VT != MVT::i64)
5301 return SDValue();
5302
5303 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA) && "Unknown shift to lower!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5304, __extension__ __PRETTY_FUNCTION__))
5304 "Unknown shift to lower!")(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA) && "Unknown shift to lower!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5304, __extension__ __PRETTY_FUNCTION__))
;
5305
5306 // We only lower SRA, SRL of 1 here, all others use generic lowering.
5307 if (!isOneConstant(N->getOperand(1)))
5308 return SDValue();
5309
5310 // If we are in thumb mode, we don't have RRX.
5311 if (ST->isThumb1Only()) return SDValue();
5312
5313 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
5314 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5315 DAG.getConstant(0, dl, MVT::i32));
5316 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5317 DAG.getConstant(1, dl, MVT::i32));
5318
5319 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
5320 // captures the result into a carry flag.
5321 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
5322 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
5323
5324 // The low part is an ARMISD::RRX operand, which shifts the carry in.
5325 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
5326
5327 // Merge the pieces into a single i64 value.
5328 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5329}
5330
5331static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
5332 SDValue TmpOp0, TmpOp1;
5333 bool Invert = false;
5334 bool Swap = false;
5335 unsigned Opc = 0;
5336
5337 SDValue Op0 = Op.getOperand(0);
5338 SDValue Op1 = Op.getOperand(1);
5339 SDValue CC = Op.getOperand(2);
5340 EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
5341 EVT VT = Op.getValueType();
5342 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
5343 SDLoc dl(Op);
5344
5345 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
5346 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
5347 // Special-case integer 64-bit equality comparisons. They aren't legal,
5348 // but they can be lowered with a few vector instructions.
5349 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
5350 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
5351 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
5352 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
5353 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
5354 DAG.getCondCode(ISD::SETEQ));
5355 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
5356 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
5357 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
5358 if (SetCCOpcode == ISD::SETNE)
5359 Merged = DAG.getNOT(dl, Merged, CmpVT);
5360 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
5361 return Merged;
5362 }
5363
5364 if (CmpVT.getVectorElementType() == MVT::i64)
5365 // 64-bit comparisons are not legal in general.
5366 return SDValue();
5367
5368 if (Op1.getValueType().isFloatingPoint()) {
5369 switch (SetCCOpcode) {
5370 default: llvm_unreachable("Illegal FP comparison")::llvm::llvm_unreachable_internal("Illegal FP comparison", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5370)
;
5371 case ISD::SETUNE:
5372 case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5373 case ISD::SETOEQ:
5374 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5375 case ISD::SETOLT:
5376 case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5377 case ISD::SETOGT:
5378 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5379 case ISD::SETOLE:
5380 case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5381 case ISD::SETOGE:
5382 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5383 case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5384 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
5385 case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5386 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
5387 case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5388 case ISD::SETONE:
5389 // Expand this to (OLT | OGT).
5390 TmpOp0 = Op0;
5391 TmpOp1 = Op1;
5392 Opc = ISD::OR;
5393 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5394 Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
5395 break;
5396 case ISD::SETUO:
5397 Invert = true;
5398 LLVM_FALLTHROUGH[[clang::fallthrough]];
5399 case ISD::SETO:
5400 // Expand this to (OLT | OGE).
5401 TmpOp0 = Op0;
5402 TmpOp1 = Op1;
5403 Opc = ISD::OR;
5404 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5405 Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
5406 break;
5407 }
5408 } else {
5409 // Integer comparisons.
5410 switch (SetCCOpcode) {
5411 default: llvm_unreachable("Illegal integer comparison")::llvm::llvm_unreachable_internal("Illegal integer comparison"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5411)
;
5412 case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5413 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5414 case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5415 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5416 case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5417 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5418 case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5419 case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
5420 case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5421 case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
5422 }
5423
5424 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
5425 if (Opc == ARMISD::VCEQ) {
5426 SDValue AndOp;
5427 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5428 AndOp = Op0;
5429 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
5430 AndOp = Op1;
5431
5432 // Ignore bitconvert.
5433 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
5434 AndOp = AndOp.getOperand(0);
5435
5436 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
5437 Opc = ARMISD::VTST;
5438 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
5439 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
5440 Invert = !Invert;
5441 }
5442 }
5443 }
5444
5445 if (Swap)
5446 std::swap(Op0, Op1);
5447
5448 // If one of the operands is a constant vector zero, attempt to fold the
5449 // comparison to a specialized compare-against-zero form.
5450 SDValue SingleOp;
5451 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5452 SingleOp = Op0;
5453 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
5454 if (Opc == ARMISD::VCGE)
5455 Opc = ARMISD::VCLEZ;
5456 else if (Opc == ARMISD::VCGT)
5457 Opc = ARMISD::VCLTZ;
5458 SingleOp = Op1;
5459 }
5460
5461 SDValue Result;
5462 if (SingleOp.getNode()) {
5463 switch (Opc) {
5464 case ARMISD::VCEQ:
5465 Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
5466 case ARMISD::VCGE:
5467 Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
5468 case ARMISD::VCLEZ:
5469 Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
5470 case ARMISD::VCGT:
5471 Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
5472 case ARMISD::VCLTZ:
5473 Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
5474 default:
5475 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5476 }
5477 } else {
5478 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5479 }
5480
5481 Result = DAG.getSExtOrTrunc(Result, dl, VT);
5482
5483 if (Invert)
5484 Result = DAG.getNOT(dl, Result, VT);
5485
5486 return Result;
5487}
5488
5489static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) {
5490 SDValue LHS = Op.getOperand(0);
5491 SDValue RHS = Op.getOperand(1);
5492 SDValue Carry = Op.getOperand(2);
5493 SDValue Cond = Op.getOperand(3);
5494 SDLoc DL(Op);
5495
5496 assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.")(static_cast <bool> (LHS.getSimpleValueType().isInteger
() && "SETCCE is integer only.") ? void (0) : __assert_fail
("LHS.getSimpleValueType().isInteger() && \"SETCCE is integer only.\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5496, __extension__ __PRETTY_FUNCTION__))
;
5497
5498 assert(Carry.getOpcode() != ISD::CARRY_FALSE)(static_cast <bool> (Carry.getOpcode() != ISD::CARRY_FALSE
) ? void (0) : __assert_fail ("Carry.getOpcode() != ISD::CARRY_FALSE"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5498, __extension__ __PRETTY_FUNCTION__))
;
5499 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
5500 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
5501
5502 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
5503 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
5504 SDValue ARMcc = DAG.getConstant(
5505 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
5506 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5507 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
5508 Cmp.getValue(1), SDValue());
5509 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
5510 CCR, Chain.getValue(1));
5511}
5512
5513/// isNEONModifiedImm - Check if the specified splat value corresponds to a
5514/// valid vector constant for a NEON instruction with a "modified immediate"
5515/// operand (e.g., VMOV). If so, return the encoded value.
5516static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
5517 unsigned SplatBitSize, SelectionDAG &DAG,
5518 const SDLoc &dl, EVT &VT, bool is128Bits,
5519 NEONModImmType type) {
5520 unsigned OpCmode, Imm;
5521
5522 // SplatBitSize is set to the smallest size that splats the vector, so a
5523 // zero vector will always have SplatBitSize == 8. However, NEON modified
5524 // immediate instructions others than VMOV do not support the 8-bit encoding
5525 // of a zero vector, and the default encoding of zero is supposed to be the
5526 // 32-bit version.
5527 if (SplatBits == 0)
5528 SplatBitSize = 32;
5529
5530 switch (SplatBitSize) {
5531 case 8:
5532 if (type != VMOVModImm)
5533 return SDValue();
5534 // Any 1-byte value is OK. Op=0, Cmode=1110.
5535 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big")(static_cast <bool> ((SplatBits & ~0xff) == 0 &&
"one byte splat value is too big") ? void (0) : __assert_fail
("(SplatBits & ~0xff) == 0 && \"one byte splat value is too big\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5535, __extension__ __PRETTY_FUNCTION__))
;
5536 OpCmode = 0xe;
5537 Imm = SplatBits;
5538 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
5539 break;
5540
5541 case 16:
5542 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
5543 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
5544 if ((SplatBits & ~0xff) == 0) {
5545 // Value = 0x00nn: Op=x, Cmode=100x.
5546 OpCmode = 0x8;
5547 Imm = SplatBits;
5548 break;
5549 }
5550 if ((SplatBits & ~0xff00) == 0) {
5551 // Value = 0xnn00: Op=x, Cmode=101x.
5552 OpCmode = 0xa;
5553 Imm = SplatBits >> 8;
5554 break;
5555 }
5556 return SDValue();
5557
5558 case 32:
5559 // NEON's 32-bit VMOV supports splat values where:
5560 // * only one byte is nonzero, or
5561 // * the least significant byte is 0xff and the second byte is nonzero, or
5562 // * the least significant 2 bytes are 0xff and the third is nonzero.
5563 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
5564 if ((SplatBits & ~0xff) == 0) {
5565 // Value = 0x000000nn: Op=x, Cmode=000x.
5566 OpCmode = 0;
5567 Imm = SplatBits;
5568 break;
5569 }
5570 if ((SplatBits & ~0xff00) == 0) {
5571 // Value = 0x0000nn00: Op=x, Cmode=001x.
5572 OpCmode = 0x2;
5573 Imm = SplatBits >> 8;
5574 break;
5575 }
5576 if ((SplatBits & ~0xff0000) == 0) {
5577 // Value = 0x00nn0000: Op=x, Cmode=010x.
5578 OpCmode = 0x4;
5579 Imm = SplatBits >> 16;
5580 break;
5581 }
5582 if ((SplatBits & ~0xff000000) == 0) {
5583 // Value = 0xnn000000: Op=x, Cmode=011x.
5584 OpCmode = 0x6;
5585 Imm = SplatBits >> 24;
5586 break;
5587 }
5588
5589 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
5590 if (type == OtherModImm) return SDValue();
5591
5592 if ((SplatBits & ~0xffff) == 0 &&
5593 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
5594 // Value = 0x0000nnff: Op=x, Cmode=1100.
5595 OpCmode = 0xc;
5596 Imm = SplatBits >> 8;
5597 break;
5598 }
5599
5600 if ((SplatBits & ~0xffffff) == 0 &&
5601 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
5602 // Value = 0x00nnffff: Op=x, Cmode=1101.
5603 OpCmode = 0xd;
5604 Imm = SplatBits >> 16;
5605 break;
5606 }
5607
5608 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
5609 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
5610 // VMOV.I32. A (very) minor optimization would be to replicate the value
5611 // and fall through here to test for a valid 64-bit splat. But, then the
5612 // caller would also need to check and handle the change in size.
5613 return SDValue();
5614
5615 case 64: {
5616 if (type != VMOVModImm)
5617 return SDValue();
5618 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
5619 uint64_t BitMask = 0xff;
5620 uint64_t Val = 0;
5621 unsigned ImmMask = 1;
5622 Imm = 0;
5623 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
5624 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
5625 Val |= BitMask;
5626 Imm |= ImmMask;
5627 } else if ((SplatBits & BitMask) != 0) {
5628 return SDValue();
5629 }
5630 BitMask <<= 8;
5631 ImmMask <<= 1;
5632 }
5633
5634 if (DAG.getDataLayout().isBigEndian())
5635 // swap higher and lower 32 bit word
5636 Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
5637
5638 // Op=1, Cmode=1110.
5639 OpCmode = 0x1e;
5640 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
5641 break;
5642 }
5643
5644 default:
5645 llvm_unreachable("unexpected size for isNEONModifiedImm")::llvm::llvm_unreachable_internal("unexpected size for isNEONModifiedImm"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5645)
;
5646 }
5647
5648 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
5649 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
5650}
5651
5652SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
5653 const ARMSubtarget *ST) const {
5654 bool IsDouble = Op.getValueType() == MVT::f64;
5655 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
5656 const APFloat &FPVal = CFP->getValueAPF();
5657
5658 // Prevent floating-point constants from using literal loads
5659 // when execute-only is enabled.
5660 if (ST->genExecuteOnly()) {
5661 APInt INTVal = FPVal.bitcastToAPInt();
5662 SDLoc DL(CFP);
5663 if (IsDouble) {
5664 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
5665 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
5666 if (!ST->isLittle())
5667 std::swap(Lo, Hi);
5668 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
5669 } else {
5670 return DAG.getConstant(INTVal, DL, MVT::i32);
5671 }
5672 }
5673
5674 if (!ST->hasVFP3())
5675 return SDValue();
5676
5677 // Use the default (constant pool) lowering for double constants when we have
5678 // an SP-only FPU
5679 if (IsDouble && Subtarget->isFPOnlySP())
5680 return SDValue();
5681
5682 // Try splatting with a VMOV.f32...
5683 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
5684
5685 if (ImmVal != -1) {
5686 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
5687 // We have code in place to select a valid ConstantFP already, no need to
5688 // do any mangling.
5689 return Op;
5690 }
5691
5692 // It's a float and we are trying to use NEON operations where
5693 // possible. Lower it to a splat followed by an extract.
5694 SDLoc DL(Op);
5695 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
5696 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
5697 NewVal);
5698 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
5699 DAG.getConstant(0, DL, MVT::i32));
5700 }
5701
5702 // The rest of our options are NEON only, make sure that's allowed before
5703 // proceeding..
5704 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
5705 return SDValue();
5706
5707 EVT VMovVT;
5708 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
5709
5710 // It wouldn't really be worth bothering for doubles except for one very
5711 // important value, which does happen to match: 0.0. So make sure we don't do
5712 // anything stupid.
5713 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
5714 return SDValue();
5715
5716 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
5717 SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
5718 VMovVT, false, VMOVModImm);
5719 if (NewVal != SDValue()) {
5720 SDLoc DL(Op);
5721 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
5722 NewVal);
5723 if (IsDouble)
5724 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5725
5726 // It's a float: cast and extract a vector element.
5727 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5728 VecConstant);
5729 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5730 DAG.getConstant(0, DL, MVT::i32));
5731 }
5732
5733 // Finally, try a VMVN.i32
5734 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
5735 false, VMVNModImm);
5736 if (NewVal != SDValue()) {
5737 SDLoc DL(Op);
5738 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
5739
5740 if (IsDouble)
5741 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5742
5743 // It's a float: cast and extract a vector element.
5744 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5745 VecConstant);
5746 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5747 DAG.getConstant(0, DL, MVT::i32));
5748 }
5749
5750 return SDValue();
5751}
5752
5753// check if an VEXT instruction can handle the shuffle mask when the
5754// vector sources of the shuffle are the same.
5755static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5756 unsigned NumElts = VT.getVectorNumElements();
5757
5758 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5759 if (M[0] < 0)
5760 return false;
5761
5762 Imm = M[0];
5763
5764 // If this is a VEXT shuffle, the immediate value is the index of the first
5765 // element. The other shuffle indices must be the successive elements after
5766 // the first one.
5767 unsigned ExpectedElt = Imm;
5768 for (unsigned i = 1; i < NumElts; ++i) {
5769 // Increment the expected index. If it wraps around, just follow it
5770 // back to index zero and keep going.
5771 ++ExpectedElt;
5772 if (ExpectedElt == NumElts)
5773 ExpectedElt = 0;
5774
5775 if (M[i] < 0) continue; // ignore UNDEF indices
5776 if (ExpectedElt != static_cast<unsigned>(M[i]))
5777 return false;
5778 }
5779
5780 return true;
5781}
5782
5783static bool isVEXTMask(ArrayRef<int> M, EVT VT,
5784 bool &ReverseVEXT, unsigned &Imm) {
5785 unsigned NumElts = VT.getVectorNumElements();
5786 ReverseVEXT = false;
5787
5788 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5789 if (M[0] < 0)
5790 return false;
5791
5792 Imm = M[0];
5793
5794 // If this is a VEXT shuffle, the immediate value is the index of the first
5795 // element. The other shuffle indices must be the successive elements after
5796 // the first one.
5797 unsigned ExpectedElt = Imm;
5798 for (unsigned i = 1; i < NumElts; ++i) {
5799 // Increment the expected index. If it wraps around, it may still be
5800 // a VEXT but the source vectors must be swapped.
5801 ExpectedElt += 1;
5802 if (ExpectedElt == NumElts * 2) {
5803 ExpectedElt = 0;
5804 ReverseVEXT = true;
5805 }
5806
5807 if (M[i] < 0) continue; // ignore UNDEF indices
5808 if (ExpectedElt != static_cast<unsigned>(M[i]))
5809 return false;
5810 }
5811
5812 // Adjust the index value if the source operands will be swapped.
5813 if (ReverseVEXT)
5814 Imm -= NumElts;
5815
5816 return true;
5817}
5818
5819/// isVREVMask - Check if a vector shuffle corresponds to a VREV
5820/// instruction with the specified blocksize. (The order of the elements
5821/// within each block of the vector is reversed.)
5822static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5823 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&(static_cast <bool> ((BlockSize==16 || BlockSize==32 ||
BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5824, __extension__ __PRETTY_FUNCTION__))
5824 "Only possible block sizes for VREV are: 16, 32, 64")(static_cast <bool> ((BlockSize==16 || BlockSize==32 ||
BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 5824, __extension__ __PRETTY_FUNCTION__))
;
5825
5826 unsigned EltSz = VT.getScalarSizeInBits();
5827 if (EltSz == 64)
5828 return false;
5829
5830 unsigned NumElts = VT.getVectorNumElements();
5831 unsigned BlockElts = M[0] + 1;
5832 // If the first shuffle index is UNDEF, be optimistic.
5833 if (M[0] < 0)
5834 BlockElts = BlockSize / EltSz;
5835
5836 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5837 return false;
5838
5839 for (unsigned i = 0; i < NumElts; ++i) {
5840 if (M[i] < 0) continue; // ignore UNDEF indices
5841 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
5842 return false;
5843 }
5844
5845 return true;
5846}
5847
5848static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
5849 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
5850 // range, then 0 is placed into the resulting vector. So pretty much any mask
5851 // of 8 elements can work here.
5852 return VT == MVT::v8i8 && M.size() == 8;
5853}
5854
5855static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
5856 unsigned Index) {
5857 if (Mask.size() == Elements * 2)
5858 return Index / Elements;
5859 return Mask[Index] == 0 ? 0 : 1;
5860}
5861
5862// Checks whether the shuffle mask represents a vector transpose (VTRN) by
5863// checking that pairs of elements in the shuffle mask represent the same index
5864// in each vector, incrementing the expected index by 2 at each step.
5865// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
5866// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
5867// v2={e,f,g,h}
5868// WhichResult gives the offset for each element in the mask based on which
5869// of the two results it belongs to.
5870//
5871// The transpose can be represented either as:
5872// result1 = shufflevector v1, v2, result1_shuffle_mask
5873// result2 = shufflevector v1, v2, result2_shuffle_mask
5874// where v1/v2 and the shuffle masks have the same number of elements
5875// (here WhichResult (see below) indicates which result is being checked)
5876//
5877// or as:
5878// results = shufflevector v1, v2, shuffle_mask
5879// where both results are returned in one vector and the shuffle mask has twice
5880// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
5881// want to check the low half and high half of the shuffle mask as if it were
5882// the other case
5883static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5884 unsigned EltSz = VT.getScalarSizeInBits();
5885 if (EltSz == 64)
5886 return false;
5887
5888 unsigned NumElts = VT.getVectorNumElements();
5889 if (M.size() != NumElts && M.size() != NumElts*2)
5890 return false;
5891
5892 // If the mask is twice as long as the input vector then we need to check the
5893 // upper and lower parts of the mask with a matching value for WhichResult
5894 // FIXME: A mask with only even values will be rejected in case the first
5895 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
5896 // M[0] is used to determine WhichResult
5897 for (unsigned i = 0; i < M.size(); i += NumElts) {
5898 WhichResult = SelectPairHalf(NumElts, M, i);
5899 for (unsigned j = 0; j < NumElts; j += 2) {
5900 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5901 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
5902 return false;
5903 }
5904 }
5905
5906 if (M.size() == NumElts*2)
5907 WhichResult = 0;
5908
5909 return true;
5910}
5911
5912/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
5913/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5914/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5915static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5916 unsigned EltSz = VT.getScalarSizeInBits();
5917 if (EltSz == 64)
5918 return false;
5919
5920 unsigned NumElts = VT.getVectorNumElements();
5921 if (M.size() != NumElts && M.size() != NumElts*2)
5922 return false;
5923
5924 for (unsigned i = 0; i < M.size(); i += NumElts) {
5925 WhichResult = SelectPairHalf(NumElts, M, i);
5926 for (unsigned j = 0; j < NumElts; j += 2) {
5927 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5928 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
5929 return false;
5930 }
5931 }
5932
5933 if (M.size() == NumElts*2)
5934 WhichResult = 0;
5935
5936 return true;
5937}
5938
5939// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
5940// that the mask elements are either all even and in steps of size 2 or all odd
5941// and in steps of size 2.
5942// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
5943// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
5944// v2={e,f,g,h}
5945// Requires similar checks to that of isVTRNMask with
5946// respect the how results are returned.
5947static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5948 unsigned EltSz = VT.getScalarSizeInBits();
5949 if (EltSz == 64)
5950 return false;
5951
5952 unsigned NumElts = VT.getVectorNumElements();
5953 if (M.size() != NumElts && M.size() != NumElts*2)
5954 return false;
5955
5956 for (unsigned i = 0; i < M.size(); i += NumElts) {
5957 WhichResult = SelectPairHalf(NumElts, M, i);
5958 for (unsigned j = 0; j < NumElts; ++j) {
5959 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
5960 return false;
5961 }
5962 }
5963
5964 if (M.size() == NumElts*2)
5965 WhichResult = 0;
5966
5967 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5968 if (VT.is64BitVector() && EltSz == 32)
5969 return false;
5970
5971 return true;
5972}
5973
5974/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
5975/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5976/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5977static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5978 unsigned EltSz = VT.getScalarSizeInBits();
5979 if (EltSz == 64)
5980 return false;
5981
5982 unsigned NumElts = VT.getVectorNumElements();
5983 if (M.size() != NumElts && M.size() != NumElts*2)
5984 return false;
5985
5986 unsigned Half = NumElts / 2;
5987 for (unsigned i = 0; i < M.size(); i += NumElts) {
5988 WhichResult = SelectPairHalf(NumElts, M, i);
5989 for (unsigned j = 0; j < NumElts; j += Half) {
5990 unsigned Idx = WhichResult;
5991 for (unsigned k = 0; k < Half; ++k) {
5992 int MIdx = M[i + j + k];
5993 if (MIdx >= 0 && (unsigned) MIdx != Idx)
5994 return false;
5995 Idx += 2;
5996 }
5997 }
5998 }
5999
6000 if (M.size() == NumElts*2)
6001 WhichResult = 0;
6002
6003 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6004 if (VT.is64BitVector() && EltSz == 32)
6005 return false;
6006
6007 return true;
6008}
6009
6010// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
6011// that pairs of elements of the shufflemask represent the same index in each
6012// vector incrementing sequentially through the vectors.
6013// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
6014// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
6015// v2={e,f,g,h}
6016// Requires similar checks to that of isVTRNMask with respect the how results
6017// are returned.
6018static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
6019 unsigned EltSz = VT.getScalarSizeInBits();
6020 if (EltSz == 64)
6021 return false;
6022
6023 unsigned NumElts = VT.getVectorNumElements();
6024 if (M.size() != NumElts && M.size() != NumElts*2)
6025 return false;
6026
6027 for (unsigned i = 0; i < M.size(); i += NumElts) {
6028 WhichResult = SelectPairHalf(NumElts, M, i);
6029 unsigned Idx = WhichResult * NumElts / 2;
6030 for (unsigned j = 0; j < NumElts; j += 2) {
6031 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
6032 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
6033 return false;
6034 Idx += 1;
6035 }
6036 }
6037
6038 if (M.size() == NumElts*2)
6039 WhichResult = 0;
6040
6041 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6042 if (VT.is64BitVector() && EltSz == 32)
6043 return false;
6044
6045 return true;
6046}
6047
6048/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
6049/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
6050/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
6051static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
6052 unsigned EltSz = VT.getScalarSizeInBits();
6053 if (EltSz == 64)
6054 return false;
6055
6056 unsigned NumElts = VT.getVectorNumElements();
6057 if (M.size() != NumElts && M.size() != NumElts*2)
6058 return false;
6059
6060 for (unsigned i = 0; i < M.size(); i += NumElts) {
6061 WhichResult = SelectPairHalf(NumElts, M, i);
6062 unsigned Idx = WhichResult * NumElts / 2;
6063 for (unsigned j = 0; j < NumElts; j += 2) {
6064 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
6065 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
6066 return false;
6067 Idx += 1;
6068 }
6069 }
6070
6071 if (M.size() == NumElts*2)
6072 WhichResult = 0;
6073
6074 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6075 if (VT.is64BitVector() && EltSz == 32)
6076 return false;
6077
6078 return true;
6079}
6080
6081/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
6082/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
6083static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
6084 unsigned &WhichResult,
6085 bool &isV_UNDEF) {
6086 isV_UNDEF = false;
6087 if (isVTRNMask(ShuffleMask, VT, WhichResult))
6088 return ARMISD::VTRN;
6089 if (isVUZPMask(ShuffleMask, VT, WhichResult))
6090 return ARMISD::VUZP;
6091 if (isVZIPMask(ShuffleMask, VT, WhichResult))
6092 return ARMISD::VZIP;
6093
6094 isV_UNDEF = true;
6095 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
6096 return ARMISD::VTRN;
6097 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6098 return ARMISD::VUZP;
6099 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6100 return ARMISD::VZIP;
6101
6102 return 0;
6103}
6104
6105/// \return true if this is a reverse operation on an vector.
6106static bool isReverseMask(ArrayRef<int> M, EVT VT) {
6107 unsigned NumElts = VT.getVectorNumElements();
6108 // Make sure the mask has the right size.
6109 if (NumElts != M.size())
6110 return false;
6111
6112 // Look for <15, ..., 3, -1, 1, 0>.
6113 for (unsigned i = 0; i != NumElts; ++i)
6114 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
6115 return false;
6116
6117 return true;
6118}
6119
6120// If N is an integer constant that can be moved into a register in one
6121// instruction, return an SDValue of such a constant (will become a MOV
6122// instruction). Otherwise return null.
6123static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
6124 const ARMSubtarget *ST, const SDLoc &dl) {
6125 uint64_t Val;
6126 if (!isa<ConstantSDNode>(N))
6127 return SDValue();
6128 Val = cast<ConstantSDNode>(N)->getZExtValue();
6129
6130 if (ST->isThumb1Only()) {
6131 if (Val <= 255 || ~Val <= 255)
6132 return DAG.getConstant(Val, dl, MVT::i32);
6133 } else {
6134 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
6135 return DAG.getConstant(Val, dl, MVT::i32);
6136 }
6137 return SDValue();
6138}
6139
6140// If this is a case we can't handle, return null and let the default
6141// expansion code take care of it.
6142SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
6143 const ARMSubtarget *ST) const {
6144 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
6145 SDLoc dl(Op);
6146 EVT VT = Op.getValueType();
6147
6148 APInt SplatBits, SplatUndef;
6149 unsigned SplatBitSize;
6150 bool HasAnyUndefs;
6151 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6152 if (SplatUndef.isAllOnesValue())
6153 return DAG.getUNDEF(VT);
6154
6155 if (SplatBitSize <= 64) {
6156 // Check if an immediate VMOV works.
6157 EVT VmovVT;
6158 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
6159 SplatUndef.getZExtValue(), SplatBitSize,
6160 DAG, dl, VmovVT, VT.is128BitVector(),
6161 VMOVModImm);
6162 if (Val.getNode()) {
6163 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
6164 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6165 }
6166
6167 // Try an immediate VMVN.
6168 uint64_t NegatedImm = (~SplatBits).getZExtValue();
6169 Val = isNEONModifiedImm(NegatedImm,
6170 SplatUndef.getZExtValue(), SplatBitSize,
6171 DAG, dl, VmovVT, VT.is128BitVector(),
6172 VMVNModImm);
6173 if (Val.getNode()) {
6174 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
6175 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6176 }
6177
6178 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
6179 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
6180 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
6181 if (ImmVal != -1) {
6182 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
6183 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
6184 }
6185 }
6186 }
6187 }
6188
6189 // Scan through the operands to see if only one value is used.
6190 //
6191 // As an optimisation, even if more than one value is used it may be more
6192 // profitable to splat with one value then change some lanes.
6193 //
6194 // Heuristically we decide to do this if the vector has a "dominant" value,
6195 // defined as splatted to more than half of the lanes.
6196 unsigned NumElts = VT.getVectorNumElements();
6197 bool isOnlyLowElement = true;
6198 bool usesOnlyOneValue = true;
6199 bool hasDominantValue = false;
6200 bool isConstant = true;
6201
6202 // Map of the number of times a particular SDValue appears in the
6203 // element list.
6204 DenseMap<SDValue, unsigned> ValueCounts;
6205 SDValue Value;
6206 for (unsigned i = 0; i < NumElts; ++i) {
6207 SDValue V = Op.getOperand(i);
6208 if (V.isUndef())
6209 continue;
6210 if (i > 0)
6211 isOnlyLowElement = false;
6212 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6213 isConstant = false;
6214
6215 ValueCounts.insert(std::make_pair(V, 0));
6216 unsigned &Count = ValueCounts[V];
6217
6218 // Is this value dominant? (takes up more than half of the lanes)
6219 if (++Count > (NumElts / 2)) {
6220 hasDominantValue = true;
6221 Value = V;
6222 }
6223 }
6224 if (ValueCounts.size() != 1)
6225 usesOnlyOneValue = false;
6226 if (!Value.getNode() && !ValueCounts.empty())
6227 Value = ValueCounts.begin()->first;
6228
6229 if (ValueCounts.empty())
6230 return DAG.getUNDEF(VT);
6231
6232 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
6233 // Keep going if we are hitting this case.
6234 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
6235 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6236
6237 unsigned EltSize = VT.getScalarSizeInBits();
6238
6239 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
6240 // i32 and try again.
6241 if (hasDominantValue && EltSize <= 32) {
6242 if (!isConstant) {
6243 SDValue N;
6244
6245 // If we are VDUPing a value that comes directly from a vector, that will
6246 // cause an unnecessary move to and from a GPR, where instead we could
6247 // just use VDUPLANE. We can only do this if the lane being extracted
6248 // is at a constant index, as the VDUP from lane instructions only have
6249 // constant-index forms.
6250 ConstantSDNode *constIndex;
6251 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6252 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
6253 // We need to create a new undef vector to use for the VDUPLANE if the
6254 // size of the vector from which we get the value is different than the
6255 // size of the vector that we need to create. We will insert the element
6256 // such that the register coalescer will remove unnecessary copies.
6257 if (VT != Value->getOperand(0).getValueType()) {
6258 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
6259 VT.getVectorNumElements();
6260 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6261 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
6262 Value, DAG.getConstant(index, dl, MVT::i32)),
6263 DAG.getConstant(index, dl, MVT::i32));
6264 } else
6265 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6266 Value->getOperand(0), Value->getOperand(1));
6267 } else
6268 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
6269
6270 if (!usesOnlyOneValue) {
6271 // The dominant value was splatted as 'N', but we now have to insert
6272 // all differing elements.
6273 for (unsigned I = 0; I < NumElts; ++I) {
6274 if (Op.getOperand(I) == Value)
6275 continue;
6276 SmallVector<SDValue, 3> Ops;
6277 Ops.push_back(N);
6278 Ops.push_back(Op.getOperand(I));
6279 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
6280 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
6281 }
6282 }
6283 return N;
6284 }
6285 if (VT.getVectorElementType().isFloatingPoint()) {
6286 SmallVector<SDValue, 8> Ops;
6287 for (unsigned i = 0; i < NumElts; ++i)
6288 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
6289 Op.getOperand(i)));
6290 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
6291 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6292 Val = LowerBUILD_VECTOR(Val, DAG, ST);
6293 if (Val.getNode())
6294 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6295 }
6296 if (usesOnlyOneValue) {
6297 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
6298 if (isConstant && Val.getNode())
6299 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
6300 }
6301 }
6302
6303 // If all elements are constants and the case above didn't get hit, fall back
6304 // to the default expansion, which will generate a load from the constant
6305 // pool.
6306 if (isConstant)
6307 return SDValue();
6308
6309 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6310 if (NumElts >= 4) {
6311 SDValue shuffle = ReconstructShuffle(Op, DAG);
6312 if (shuffle != SDValue())
6313 return shuffle;
6314 }
6315
6316 if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
6317 // If we haven't found an efficient lowering, try splitting a 128-bit vector
6318 // into two 64-bit vectors; we might discover a better way to lower it.
6319 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
6320 EVT ExtVT = VT.getVectorElementType();
6321 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
6322 SDValue Lower =
6323 DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
6324 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
6325 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
6326 SDValue Upper = DAG.getBuildVector(
6327 HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
6328 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
6329 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
6330 if (Lower && Upper)
6331 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
6332 }
6333
6334 // Vectors with 32- or 64-bit elements can be built by directly assigning
6335 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
6336 // will be legalized.
6337 if (EltSize >= 32) {
6338 // Do the expansion with floating-point types, since that is what the VFP
6339 // registers are defined to use, and since i64 is not legal.
6340 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6341 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6342 SmallVector<SDValue, 8> Ops;
6343 for (unsigned i = 0; i < NumElts; ++i)
6344 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
6345 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6346 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6347 }
6348
6349 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6350 // know the default expansion would otherwise fall back on something even
6351 // worse. For a vector with one or two non-undef values, that's
6352 // scalar_to_vector for the elements followed by a shuffle (provided the
6353 // shuffle is valid for the target) and materialization element by element
6354 // on the stack followed by a load for everything else.
6355 if (!isConstant && !usesOnlyOneValue) {
6356 SDValue Vec = DAG.getUNDEF(VT);
6357 for (unsigned i = 0 ; i < NumElts; ++i) {
6358 SDValue V = Op.getOperand(i);
6359 if (V.isUndef())
6360 continue;
6361 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
6362 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6363 }
6364 return Vec;
6365 }
6366
6367 return SDValue();
6368}
6369
6370// Gather data to see if the operation can be modelled as a
6371// shuffle in combination with VEXTs.
6372SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
6373 SelectionDAG &DAG) const {
6374 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6374, __extension__ __PRETTY_FUNCTION__))
;
6375 SDLoc dl(Op);
6376 EVT VT = Op.getValueType();
6377 unsigned NumElts = VT.getVectorNumElements();
6378
6379 struct ShuffleSourceInfo {
6380 SDValue Vec;
6381 unsigned MinElt = std::numeric_limits<unsigned>::max();
6382 unsigned MaxElt = 0;
6383
6384 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
6385 // be compatible with the shuffle we intend to construct. As a result
6386 // ShuffleVec will be some sliding window into the original Vec.
6387 SDValue ShuffleVec;
6388
6389 // Code should guarantee that element i in Vec starts at element "WindowBase
6390 // + i * WindowScale in ShuffleVec".
6391 int WindowBase = 0;
6392 int WindowScale = 1;
6393
6394 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
6395
6396 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
6397 };
6398
6399 // First gather all vectors used as an immediate source for this BUILD_VECTOR
6400 // node.
6401 SmallVector<ShuffleSourceInfo, 2> Sources;
6402 for (unsigned i = 0; i < NumElts; ++i) {
6403 SDValue V = Op.getOperand(i);
6404 if (V.isUndef())
6405 continue;
6406 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
6407 // A shuffle can only come from building a vector from various
6408 // elements of other vectors.
6409 return SDValue();
6410 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
6411 // Furthermore, shuffles require a constant mask, whereas extractelts
6412 // accept variable indices.
6413 return SDValue();
6414 }
6415
6416 // Add this element source to the list if it's not already there.
6417 SDValue SourceVec = V.getOperand(0);
6418 auto Source = llvm::find(Sources, SourceVec);
6419 if (Source == Sources.end())
6420 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
6421
6422 // Update the minimum and maximum lane number seen.
6423 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
6424 Source->MinElt = std::min(Source->MinElt, EltNo);
6425 Source->MaxElt = std::max(Source->MaxElt, EltNo);
6426 }
6427
6428 // Currently only do something sane when at most two source vectors
6429 // are involved.
6430 if (Sources.size() > 2)
6431 return SDValue();
6432
6433 // Find out the smallest element size among result and two sources, and use
6434 // it as element size to build the shuffle_vector.
6435 EVT SmallestEltTy = VT.getVectorElementType();
6436 for (auto &Source : Sources) {
6437 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
6438 if (SrcEltTy.bitsLT(SmallestEltTy))
6439 SmallestEltTy = SrcEltTy;
6440 }
6441 unsigned ResMultiplier =
6442 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
6443 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
6444 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
6445
6446 // If the source vector is too wide or too narrow, we may nevertheless be able
6447 // to construct a compatible shuffle either by concatenating it with UNDEF or
6448 // extracting a suitable range of elements.
6449 for (auto &Src : Sources) {
6450 EVT SrcVT = Src.ShuffleVec.getValueType();
6451
6452 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
6453 continue;
6454
6455 // This stage of the search produces a source with the same element type as
6456 // the original, but with a total width matching the BUILD_VECTOR output.
6457 EVT EltVT = SrcVT.getVectorElementType();
6458 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
6459 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
6460
6461 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
6462 if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
6463 return SDValue();
6464 // We can pad out the smaller vector for free, so if it's part of a
6465 // shuffle...
6466 Src.ShuffleVec =
6467 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
6468 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
6469 continue;
6470 }
6471
6472 if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
6473 return SDValue();
6474
6475 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
6476 // Span too large for a VEXT to cope
6477 return SDValue();
6478 }
6479
6480 if (Src.MinElt >= NumSrcElts) {
6481 // The extraction can just take the second half
6482 Src.ShuffleVec =
6483 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6484 DAG.getConstant(NumSrcElts, dl, MVT::i32));
6485 Src.WindowBase = -NumSrcElts;
6486 } else if (Src.MaxElt < NumSrcElts) {
6487 // The extraction can just take the first half
6488 Src.ShuffleVec =
6489 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6490 DAG.getConstant(0, dl, MVT::i32));
6491 } else {
6492 // An actual VEXT is needed
6493 SDValue VEXTSrc1 =
6494 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6495 DAG.getConstant(0, dl, MVT::i32));
6496 SDValue VEXTSrc2 =
6497 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6498 DAG.getConstant(NumSrcElts, dl, MVT::i32));
6499
6500 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
6501 VEXTSrc2,
6502 DAG.getConstant(Src.MinElt, dl, MVT::i32));
6503 Src.WindowBase = -Src.MinElt;
6504 }
6505 }
6506
6507 // Another possible incompatibility occurs from the vector element types. We
6508 // can fix this by bitcasting the source vectors to the same type we intend
6509 // for the shuffle.
6510 for (auto &Src : Sources) {
6511 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
6512 if (SrcEltTy == SmallestEltTy)
6513 continue;
6514 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)(static_cast <bool> (ShuffleVT.getVectorElementType() ==
SmallestEltTy) ? void (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6514, __extension__ __PRETTY_FUNCTION__))
;
6515 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
6516 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
6517 Src.WindowBase *= Src.WindowScale;
6518 }
6519
6520 // Final sanity check before we try to actually produce a shuffle.
6521 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6523, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
6522 for (auto Src : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6523, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
6523 assert(Src.ShuffleVec.getValueType() == ShuffleVT);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6523, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
6524 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6523, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
;
6525
6526 // The stars all align, our next step is to produce the mask for the shuffle.
6527 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
6528 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
6529 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
6530 SDValue Entry = Op.getOperand(i);
6531 if (Entry.isUndef())
6532 continue;
6533
6534 auto Src = llvm::find(Sources, Entry.getOperand(0));
6535 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
6536
6537 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
6538 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
6539 // segment.
6540 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
6541 int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
6542 VT.getScalarSizeInBits());
6543 int LanesDefined = BitsDefined / BitsPerShuffleLane;
6544
6545 // This source is expected to fill ResMultiplier lanes of the final shuffle,
6546 // starting at the appropriate offset.
6547 int *LaneMask = &Mask[i * ResMultiplier];
6548
6549 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
6550 ExtractBase += NumElts * (Src - Sources.begin());
6551 for (int j = 0; j < LanesDefined; ++j)
6552 LaneMask[j] = ExtractBase + j;
6553 }
6554
6555 // Final check before we try to produce nonsense...
6556 if (!isShuffleMaskLegal(Mask, ShuffleVT))
6557 return SDValue();
6558
6559 // We can't handle more than two sources. This should have already
6560 // been checked before this point.
6561 assert(Sources.size() <= 2 && "Too many sources!")(static_cast <bool> (Sources.size() <= 2 && "Too many sources!"
) ? void (0) : __assert_fail ("Sources.size() <= 2 && \"Too many sources!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6561, __extension__ __PRETTY_FUNCTION__))
;
6562
6563 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
6564 for (unsigned i = 0; i < Sources.size(); ++i)
6565 ShuffleOps[i] = Sources[i].ShuffleVec;
6566
6567 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
6568 ShuffleOps[1], Mask);
6569 return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
6570}
6571
6572/// isShuffleMaskLegal - Targets can use this to indicate that they only
6573/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
6574/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
6575/// are assumed to be legal.
6576bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
6577 if (VT.getVectorNumElements() == 4 &&
6578 (VT.is128BitVector() || VT.is64BitVector())) {
6579 unsigned PFIndexes[4];
6580 for (unsigned i = 0; i != 4; ++i) {
6581 if (M[i] < 0)
6582 PFIndexes[i] = 8;
6583 else
6584 PFIndexes[i] = M[i];
6585 }
6586
6587 // Compute the index in the perfect shuffle table.
6588 unsigned PFTableIndex =
6589 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6590 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6591 unsigned Cost = (PFEntry >> 30);
6592
6593 if (Cost <= 4)
6594 return true;
6595 }
6596
6597 bool ReverseVEXT, isV_UNDEF;
6598 unsigned Imm, WhichResult;
6599
6600 unsigned EltSize = VT.getScalarSizeInBits();
6601 return (EltSize >= 32 ||
6602 ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
6603 isVREVMask(M, VT, 64) ||
6604 isVREVMask(M, VT, 32) ||
6605 isVREVMask(M, VT, 16) ||
6606 isVEXTMask(M, VT, ReverseVEXT, Imm) ||
6607 isVTBLMask(M, VT) ||
6608 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
6609 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
6610}
6611
6612/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
6613/// the specified operations to build the shuffle.
6614static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
6615 SDValue RHS, SelectionDAG &DAG,
6616 const SDLoc &dl) {
6617 unsigned OpNum = (PFEntry >> 26) & 0x0F;
6618 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
6619 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
6620
6621 enum {
6622 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6623 OP_VREV,
6624 OP_VDUP0,
6625 OP_VDUP1,
6626 OP_VDUP2,
6627 OP_VDUP3,
6628 OP_VEXT1,
6629 OP_VEXT2,
6630 OP_VEXT3,
6631 OP_VUZPL, // VUZP, left result
6632 OP_VUZPR, // VUZP, right result
6633 OP_VZIPL, // VZIP, left result
6634 OP_VZIPR, // VZIP, right result
6635 OP_VTRNL, // VTRN, left result
6636 OP_VTRNR // VTRN, right result
6637 };
6638
6639 if (OpNum == OP_COPY) {
6640 if (LHSID == (1*9+2)*9+3) return LHS;
6641 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")(static_cast <bool> (LHSID == ((4*9+5)*9+6)*9+7 &&
"Illegal OP_COPY!") ? void (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6641, __extension__ __PRETTY_FUNCTION__))
;
6642 return RHS;
6643 }
6644
6645 SDValue OpLHS, OpRHS;
6646 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
6647 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
6648 EVT VT = OpLHS.getValueType();
6649
6650 switch (OpNum) {
6651 default: llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6651)
;
6652 case OP_VREV:
6653 // VREV divides the vector in half and swaps within the half.
6654 if (VT.getVectorElementType() == MVT::i32 ||
6655 VT.getVectorElementType() == MVT::f32)
6656 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
6657 // vrev <4 x i16> -> VREV32
6658 if (VT.getVectorElementType() == MVT::i16)
6659 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
6660 // vrev <4 x i8> -> VREV16
6661 assert(VT.getVectorElementType() == MVT::i8)(static_cast <bool> (VT.getVectorElementType() == MVT::
i8) ? void (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6661, __extension__ __PRETTY_FUNCTION__))
;
6662 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
6663 case OP_VDUP0:
6664 case OP_VDUP1:
6665 case OP_VDUP2:
6666 case OP_VDUP3:
6667 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6668 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
6669 case OP_VEXT1:
6670 case OP_VEXT2:
6671 case OP_VEXT3:
6672 return DAG.getNode(ARMISD::VEXT, dl, VT,
6673 OpLHS, OpRHS,
6674 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
6675 case OP_VUZPL:
6676 case OP_VUZPR:
6677 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
6678 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
6679 case OP_VZIPL:
6680 case OP_VZIPR:
6681 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
6682 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
6683 case OP_VTRNL:
6684 case OP_VTRNR:
6685 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
6686 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
6687 }
6688}
6689
6690static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
6691 ArrayRef<int> ShuffleMask,
6692 SelectionDAG &DAG) {
6693 // Check to see if we can use the VTBL instruction.
6694 SDValue V1 = Op.getOperand(0);
6695 SDValue V2 = Op.getOperand(1);
6696 SDLoc DL(Op);
6697
6698 SmallVector<SDValue, 8> VTBLMask;
6699 for (ArrayRef<int>::iterator
6700 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
6701 VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
6702
6703 if (V2.getNode()->isUndef())
6704 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
6705 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6706
6707 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
6708 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6709}
6710
6711static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
6712 SelectionDAG &DAG) {
6713 SDLoc DL(Op);
6714 SDValue OpLHS = Op.getOperand(0);
6715 EVT VT = OpLHS.getValueType();
6716
6717 assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&(static_cast <bool> ((VT == MVT::v8i16 || VT == MVT::v16i8
) && "Expect an v8i16/v16i8 type") ? void (0) : __assert_fail
("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6718, __extension__ __PRETTY_FUNCTION__))
6718 "Expect an v8i16/v16i8 type")(static_cast <bool> ((VT == MVT::v8i16 || VT == MVT::v16i8
) && "Expect an v8i16/v16i8 type") ? void (0) : __assert_fail
("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6718, __extension__ __PRETTY_FUNCTION__))
;
6719 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
6720 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
6721 // extract the first 8 bytes into the top double word and the last 8 bytes
6722 // into the bottom double word. The v8i16 case is similar.
6723 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
6724 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
6725 DAG.getConstant(ExtractNum, DL, MVT::i32));
6726}
6727
6728static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
6729 SDValue V1 = Op.getOperand(0);
6730 SDValue V2 = Op.getOperand(1);
6731 SDLoc dl(Op);
6732 EVT VT = Op.getValueType();
6733 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
6734
6735 // Convert shuffles that are directly supported on NEON to target-specific
6736 // DAG nodes, instead of keeping them as shuffles and matching them again
6737 // during code selection. This is more efficient and avoids the possibility
6738 // of inconsistencies between legalization and selection.
6739 // FIXME: floating-point vectors should be canonicalized to integer vectors
6740 // of the same time so that they get CSEd properly.
6741 ArrayRef<int> ShuffleMask = SVN->getMask();
6742
6743 unsigned EltSize = VT.getScalarSizeInBits();
6744 if (EltSize <= 32) {
6745 if (SVN->isSplat()) {
6746 int Lane = SVN->getSplatIndex();
6747 // If this is undef splat, generate it via "just" vdup, if possible.
6748 if (Lane == -1) Lane = 0;
6749
6750 // Test if V1 is a SCALAR_TO_VECTOR.
6751 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
6752 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6753 }
6754 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
6755 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
6756 // reaches it).
6757 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
6758 !isa<ConstantSDNode>(V1.getOperand(0))) {
6759 bool IsScalarToVector = true;
6760 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
6761 if (!V1.getOperand(i).isUndef()) {
6762 IsScalarToVector = false;
6763 break;
6764 }
6765 if (IsScalarToVector)
6766 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6767 }
6768 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
6769 DAG.getConstant(Lane, dl, MVT::i32));
6770 }
6771
6772 bool ReverseVEXT;
6773 unsigned Imm;
6774 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
6775 if (ReverseVEXT)
6776 std::swap(V1, V2);
6777 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
6778 DAG.getConstant(Imm, dl, MVT::i32));
6779 }
6780
6781 if (isVREVMask(ShuffleMask, VT, 64))
6782 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
6783 if (isVREVMask(ShuffleMask, VT, 32))
6784 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
6785 if (isVREVMask(ShuffleMask, VT, 16))
6786 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
6787
6788 if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
6789 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
6790 DAG.getConstant(Imm, dl, MVT::i32));
6791 }
6792
6793 // Check for Neon shuffles that modify both input vectors in place.
6794 // If both results are used, i.e., if there are two shuffles with the same
6795 // source operands and with masks corresponding to both results of one of
6796 // these operations, DAG memoization will ensure that a single node is
6797 // used for both shuffles.
6798 unsigned WhichResult;
6799 bool isV_UNDEF;
6800 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6801 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
6802 if (isV_UNDEF)
6803 V2 = V1;
6804 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
6805 .getValue(WhichResult);
6806 }
6807
6808 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
6809 // shuffles that produce a result larger than their operands with:
6810 // shuffle(concat(v1, undef), concat(v2, undef))
6811 // ->
6812 // shuffle(concat(v1, v2), undef)
6813 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
6814 //
6815 // This is useful in the general case, but there are special cases where
6816 // native shuffles produce larger results: the two-result ops.
6817 //
6818 // Look through the concat when lowering them:
6819 // shuffle(concat(v1, v2), undef)
6820 // ->
6821 // concat(VZIP(v1, v2):0, :1)
6822 //
6823 if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
6824 SDValue SubV1 = V1->getOperand(0);
6825 SDValue SubV2 = V1->getOperand(1);
6826 EVT SubVT = SubV1.getValueType();
6827
6828 // We expect these to have been canonicalized to -1.
6829 assert(llvm::all_of(ShuffleMask, [&](int i) {(static_cast <bool> (llvm::all_of(ShuffleMask, [&](
int i) { return i < (int)VT.getVectorNumElements(); }) &&
"Unexpected shuffle index into UNDEF operand!") ? void (0) :
__assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6831, __extension__ __PRETTY_FUNCTION__))
6830 return i < (int)VT.getVectorNumElements();(static_cast <bool> (llvm::all_of(ShuffleMask, [&](
int i) { return i < (int)VT.getVectorNumElements(); }) &&
"Unexpected shuffle index into UNDEF operand!") ? void (0) :
__assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6831, __extension__ __PRETTY_FUNCTION__))
6831 }) && "Unexpected shuffle index into UNDEF operand!")(static_cast <bool> (llvm::all_of(ShuffleMask, [&](
int i) { return i < (int)VT.getVectorNumElements(); }) &&
"Unexpected shuffle index into UNDEF operand!") ? void (0) :
__assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6831, __extension__ __PRETTY_FUNCTION__))
;
6832
6833 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6834 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
6835 if (isV_UNDEF)
6836 SubV2 = SubV1;
6837 assert((WhichResult == 0) &&(static_cast <bool> ((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? void (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6838, __extension__ __PRETTY_FUNCTION__))
6838 "In-place shuffle of concat can only have one result!")(static_cast <bool> ((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? void (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6838, __extension__ __PRETTY_FUNCTION__))
;
6839 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
6840 SubV1, SubV2);
6841 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
6842 Res.getValue(1));
6843 }
6844 }
6845 }
6846
6847 // If the shuffle is not directly supported and it has 4 elements, use
6848 // the PerfectShuffle-generated table to synthesize it from other shuffles.
6849 unsigned NumElts = VT.getVectorNumElements();
6850 if (NumElts == 4) {
6851 unsigned PFIndexes[4];
6852 for (unsigned i = 0; i != 4; ++i) {
6853 if (ShuffleMask[i] < 0)
6854 PFIndexes[i] = 8;
6855 else
6856 PFIndexes[i] = ShuffleMask[i];
6857 }
6858
6859 // Compute the index in the perfect shuffle table.
6860 unsigned PFTableIndex =
6861 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6862 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6863 unsigned Cost = (PFEntry >> 30);
6864
6865 if (Cost <= 4)
6866 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
6867 }
6868
6869 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
6870 if (EltSize >= 32) {
6871 // Do the expansion with floating-point types, since that is what the VFP
6872 // registers are defined to use, and since i64 is not legal.
6873 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6874 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6875 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
6876 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
6877 SmallVector<SDValue, 8> Ops;
6878 for (unsigned i = 0; i < NumElts; ++i) {
6879 if (ShuffleMask[i] < 0)
6880 Ops.push_back(DAG.getUNDEF(EltVT));
6881 else
6882 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
6883 ShuffleMask[i] < (int)NumElts ? V1 : V2,
6884 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
6885 dl, MVT::i32)));
6886 }
6887 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6888 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6889 }
6890
6891 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
6892 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
6893
6894 if (VT == MVT::v8i8)
6895 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
6896 return NewOp;
6897
6898 return SDValue();
6899}
6900
6901static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
6902 // INSERT_VECTOR_ELT is legal only for immediate indexes.
6903 SDValue Lane = Op.getOperand(2);
6904 if (!isa<ConstantSDNode>(Lane))
6905 return SDValue();
6906
6907 return Op;
6908}
6909
6910static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
6911 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
6912 SDValue Lane = Op.getOperand(1);
6913 if (!isa<ConstantSDNode>(Lane))
6914 return SDValue();
6915
6916 SDValue Vec = Op.getOperand(0);
6917 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
6918 SDLoc dl(Op);
6919 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
6920 }
6921
6922 return Op;
6923}
6924
6925static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
6926 // The only time a CONCAT_VECTORS operation can have legal types is when
6927 // two 64-bit vectors are concatenated to a 128-bit vector.
6928 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&(static_cast <bool> (Op.getValueType().is128BitVector()
&& Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"
) ? void (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6929, __extension__ __PRETTY_FUNCTION__))
6929 "unexpected CONCAT_VECTORS")(static_cast <bool> (Op.getValueType().is128BitVector()
&& Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"
) ? void (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 6929, __extension__ __PRETTY_FUNCTION__))
;
6930 SDLoc dl(Op);
6931 SDValue Val = DAG.getUNDEF(MVT::v2f64);
6932 SDValue Op0 = Op.getOperand(0);
6933 SDValue Op1 = Op.getOperand(1);
6934 if (!Op0.isUndef())
6935 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6936 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
6937 DAG.getIntPtrConstant(0, dl));
6938 if (!Op1.isUndef())
6939 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6940 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
6941 DAG.getIntPtrConstant(1, dl));
6942 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
6943}
6944
6945/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
6946/// element has been zero/sign-extended, depending on the isSigned parameter,
6947/// from an integer type half its size.
6948static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
6949 bool isSigned) {
6950 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
6951 EVT VT = N->getValueType(0);
6952 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
6953 SDNode *BVN = N->getOperand(0).getNode();
6954 if (BVN->getValueType(0) != MVT::v4i32 ||
6955 BVN->getOpcode() != ISD::BUILD_VECTOR)
6956 return false;
6957 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
6958 unsigned HiElt = 1 - LoElt;
6959 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
6960 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
6961 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
6962 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
6963 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
6964 return false;
6965 if (isSigned) {
6966 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
6967 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
6968 return true;
6969 } else {
6970 if (Hi0->isNullValue() && Hi1->isNullValue())
6971 return true;
6972 }
6973 return false;
6974 }
6975
6976 if (N->getOpcode() != ISD::BUILD_VECTOR)
6977 return false;
6978
6979 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
6980 SDNode *Elt = N->getOperand(i).getNode();
6981 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
6982 unsigned EltSize = VT.getScalarSizeInBits();
6983 unsigned HalfSize = EltSize / 2;
6984 if (isSigned) {
6985 if (!isIntN(HalfSize, C->getSExtValue()))
6986 return false;
6987 } else {
6988 if (!isUIntN(HalfSize, C->getZExtValue()))
6989 return false;
6990 }
6991 continue;
6992 }
6993 return false;
6994 }
6995
6996 return true;
6997}
6998
6999/// isSignExtended - Check if a node is a vector value that is sign-extended
7000/// or a constant BUILD_VECTOR with sign-extended elements.
7001static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
7002 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
7003 return true;
7004 if (isExtendedBUILD_VECTOR(N, DAG, true))
7005 return true;
7006 return false;
7007}
7008
7009/// isZeroExtended - Check if a node is a vector value that is zero-extended
7010/// or a constant BUILD_VECTOR with zero-extended elements.
7011static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
7012 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
7013 return true;
7014 if (isExtendedBUILD_VECTOR(N, DAG, false))
7015 return true;
7016 return false;
7017}
7018
7019static EVT getExtensionTo64Bits(const EVT &OrigVT) {
7020 if (OrigVT.getSizeInBits() >= 64)
7021 return OrigVT;
7022
7023 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7023, __extension__ __PRETTY_FUNCTION__))
;
7024
7025 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
7026 switch (OrigSimpleTy) {
7027 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7027)
;
7028 case MVT::v2i8:
7029 case MVT::v2i16:
7030 return MVT::v2i32;
7031 case MVT::v4i8:
7032 return MVT::v4i16;
7033 }
7034}
7035
7036/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
7037/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
7038/// We insert the required extension here to get the vector to fill a D register.
7039static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
7040 const EVT &OrigTy,
7041 const EVT &ExtTy,
7042 unsigned ExtOpcode) {
7043 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
7044 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
7045 // 64-bits we need to insert a new extension so that it will be 64-bits.
7046 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7046, __extension__ __PRETTY_FUNCTION__))
;
7047 if (OrigTy.getSizeInBits() >= 64)
7048 return N;
7049
7050 // Must extend size to at least 64 bits to be used as an operand for VMULL.
7051 EVT NewVT = getExtensionTo64Bits(OrigTy);
7052
7053 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
7054}
7055
7056/// SkipLoadExtensionForVMULL - return a load of the original vector size that
7057/// does not do any sign/zero extension. If the original vector is less
7058/// than 64 bits, an appropriate extension will be added after the load to
7059/// reach a total size of 64 bits. We have to add the extension separately
7060/// because ARM does not have a sign/zero extending load for vectors.
7061static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
7062 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
7063
7064 // The load already has the right type.
7065 if (ExtendedTy == LD->getMemoryVT())
7066 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
7067 LD->getBasePtr(), LD->getPointerInfo(),
7068 LD->getAlignment(), LD->getMemOperand()->getFlags());
7069
7070 // We need to create a zextload/sextload. We cannot just create a load
7071 // followed by a zext/zext node because LowerMUL is also run during normal
7072 // operation legalization where we can't create illegal types.
7073 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
7074 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
7075 LD->getMemoryVT(), LD->getAlignment(),
7076 LD->getMemOperand()->getFlags());
7077}
7078
7079/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
7080/// extending load, or BUILD_VECTOR with extended elements, return the
7081/// unextended value. The unextended vector should be 64 bits so that it can
7082/// be used as an operand to a VMULL instruction. If the original vector size
7083/// before extension is less than 64 bits we add a an extension to resize
7084/// the vector to 64 bits.
7085static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
7086 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
7087 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
7088 N->getOperand(0)->getValueType(0),
7089 N->getValueType(0),
7090 N->getOpcode());
7091
7092 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
7093 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&(static_cast <bool> ((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad
(LD)) && "Expected extending load") ? void (0) : __assert_fail
("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7094, __extension__ __PRETTY_FUNCTION__))
7094 "Expected extending load")(static_cast <bool> ((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad
(LD)) && "Expected extending load") ? void (0) : __assert_fail
("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7094, __extension__ __PRETTY_FUNCTION__))
;
7095
7096 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
7097 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
7098 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7099 SDValue extLoad =
7100 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
7101 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
7102
7103 return newLoad;
7104 }
7105
7106 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
7107 // have been legalized as a BITCAST from v4i32.
7108 if (N->getOpcode() == ISD::BITCAST) {
7109 SDNode *BVN = N->getOperand(0).getNode();
7110 assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&(static_cast <bool> (BVN->getOpcode() == ISD::BUILD_VECTOR
&& BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7111, __extension__ __PRETTY_FUNCTION__))
7111 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR")(static_cast <bool> (BVN->getOpcode() == ISD::BUILD_VECTOR
&& BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7111, __extension__ __PRETTY_FUNCTION__))
;
7112 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
7113 return DAG.getBuildVector(
7114 MVT::v2i32, SDLoc(N),
7115 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
7116 }
7117 // Construct a new BUILD_VECTOR with elements truncated to half the size.
7118 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7118, __extension__ __PRETTY_FUNCTION__))
;
7119 EVT VT = N->getValueType(0);
7120 unsigned EltSize = VT.getScalarSizeInBits() / 2;
7121 unsigned NumElts = VT.getVectorNumElements();
7122 MVT TruncVT = MVT::getIntegerVT(EltSize);
7123 SmallVector<SDValue, 8> Ops;
7124 SDLoc dl(N);
7125 for (unsigned i = 0; i != NumElts; ++i) {
7126 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
7127 const APInt &CInt = C->getAPIntValue();
7128 // Element types smaller than 32 bits are not legal, so use i32 elements.
7129 // The values are implicitly truncated so sext vs. zext doesn't matter.
7130 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
7131 }
7132 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
7133}
7134
7135static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
7136 unsigned Opcode = N->getOpcode();
7137 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7138 SDNode *N0 = N->getOperand(0).getNode();
7139 SDNode *N1 = N->getOperand(1).getNode();
7140 return N0->hasOneUse() && N1->hasOneUse() &&
7141 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
7142 }
7143 return false;
7144}
7145
7146static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
7147 unsigned Opcode = N->getOpcode();
7148 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7149 SDNode *N0 = N->getOperand(0).getNode();
7150 SDNode *N1 = N->getOperand(1).getNode();
7151 return N0->hasOneUse() && N1->hasOneUse() &&
7152 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
7153 }
7154 return false;
7155}
7156
7157static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
7158 // Multiplications are only custom-lowered for 128-bit vectors so that
7159 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
7160 EVT VT = Op.getValueType();
7161 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7162, __extension__ __PRETTY_FUNCTION__))
7162 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7162, __extension__ __PRETTY_FUNCTION__))
;
7163 SDNode *N0 = Op.getOperand(0).getNode();
7164 SDNode *N1 = Op.getOperand(1).getNode();
7165 unsigned NewOpc = 0;
7166 bool isMLA = false;
7167 bool isN0SExt = isSignExtended(N0, DAG);
7168 bool isN1SExt = isSignExtended(N1, DAG);
7169 if (isN0SExt && isN1SExt)
7170 NewOpc = ARMISD::VMULLs;
7171 else {
7172 bool isN0ZExt = isZeroExtended(N0, DAG);
7173 bool isN1ZExt = isZeroExtended(N1, DAG);
7174 if (isN0ZExt && isN1ZExt)
7175 NewOpc = ARMISD::VMULLu;
7176 else if (isN1SExt || isN1ZExt) {
7177 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
7178 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
7179 if (isN1SExt && isAddSubSExt(N0, DAG)) {
7180 NewOpc = ARMISD::VMULLs;
7181 isMLA = true;
7182 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
7183 NewOpc = ARMISD::VMULLu;
7184 isMLA = true;
7185 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
7186 std::swap(N0, N1);
7187 NewOpc = ARMISD::VMULLu;
7188 isMLA = true;
7189 }
7190 }
7191
7192 if (!NewOpc) {
7193 if (VT == MVT::v2i64)
7194 // Fall through to expand this. It is not legal.
7195 return SDValue();
7196 else
7197 // Other vector multiplications are legal.
7198 return Op;
7199 }
7200 }
7201
7202 // Legalize to a VMULL instruction.
7203 SDLoc DL(Op);
7204 SDValue Op0;
7205 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
7206 if (!isMLA) {
7207 Op0 = SkipExtensionForVMULL(N0, DAG);
7208 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7210, __extension__ __PRETTY_FUNCTION__))
7209 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7210, __extension__ __PRETTY_FUNCTION__))
7210 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7210, __extension__ __PRETTY_FUNCTION__))
;
7211 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
7212 }
7213
7214 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
7215 // isel lowering to take advantage of no-stall back to back vmul + vmla.
7216 // vmull q0, d4, d6
7217 // vmlal q0, d5, d6
7218 // is faster than
7219 // vaddl q0, d4, d5
7220 // vmovl q1, d6
7221 // vmul q0, q0, q1
7222 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
7223 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
7224 EVT Op1VT = Op1.getValueType();
7225 return DAG.getNode(N0->getOpcode(), DL, VT,
7226 DAG.getNode(NewOpc, DL, VT,
7227 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
7228 DAG.getNode(NewOpc, DL, VT,
7229 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
7230}
7231
7232static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
7233 SelectionDAG &DAG) {
7234 // TODO: Should this propagate fast-math-flags?
7235
7236 // Convert to float
7237 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
7238 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
7239 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
7240 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
7241 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
7242 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
7243 // Get reciprocal estimate.
7244 // float4 recip = vrecpeq_f32(yf);
7245 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7246 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7247 Y);
7248 // Because char has a smaller range than uchar, we can actually get away
7249 // without any newton steps. This requires that we use a weird bias
7250 // of 0xb000, however (again, this has been exhaustively tested).
7251 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
7252 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
7253 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
7254 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
7255 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
7256 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
7257 // Convert back to short.
7258 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
7259 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
7260 return X;
7261}
7262
7263static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
7264 SelectionDAG &DAG) {
7265 // TODO: Should this propagate fast-math-flags?
7266
7267 SDValue N2;
7268 // Convert to float.
7269 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
7270 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
7271 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
7272 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
7273 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7274 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7275
7276 // Use reciprocal estimate and one refinement step.
7277 // float4 recip = vrecpeq_f32(yf);
7278 // recip *= vrecpsq_f32(yf, recip);
7279 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7280 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7281 N1);
7282 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7283 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7284 N1, N2);
7285 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7286 // Because short has a smaller range than ushort, we can actually get away
7287 // with only a single newton step. This requires that we use a weird bias
7288 // of 89, however (again, this has been exhaustively tested).
7289 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
7290 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7291 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7292 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
7293 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7294 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7295 // Convert back to integer and return.
7296 // return vmovn_s32(vcvt_s32_f32(result));
7297 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7298 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7299 return N0;
7300}
7301
7302static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
7303 EVT VT = Op.getValueType();
7304 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::SDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7305, __extension__ __PRETTY_FUNCTION__))
7305 "unexpected type for custom-lowering ISD::SDIV")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::SDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7305, __extension__ __PRETTY_FUNCTION__))
;
7306
7307 SDLoc dl(Op);
7308 SDValue N0 = Op.getOperand(0);
7309 SDValue N1 = Op.getOperand(1);
7310 SDValue N2, N3;
7311
7312 if (VT == MVT::v8i8) {
7313 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
7314 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
7315
7316 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7317 DAG.getIntPtrConstant(4, dl));
7318 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7319 DAG.getIntPtrConstant(4, dl));
7320 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7321 DAG.getIntPtrConstant(0, dl));
7322 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7323 DAG.getIntPtrConstant(0, dl));
7324
7325 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
7326 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
7327
7328 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7329 N0 = LowerCONCAT_VECTORS(N0, DAG);
7330
7331 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
7332 return N0;
7333 }
7334 return LowerSDIV_v4i16(N0, N1, dl, DAG);
7335}
7336
7337static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
7338 // TODO: Should this propagate fast-math-flags?
7339 EVT VT = Op.getValueType();
7340 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::UDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7341, __extension__ __PRETTY_FUNCTION__))
7341 "unexpected type for custom-lowering ISD::UDIV")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::UDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7341, __extension__ __PRETTY_FUNCTION__))
;
7342
7343 SDLoc dl(Op);
7344 SDValue N0 = Op.getOperand(0);
7345 SDValue N1 = Op.getOperand(1);
7346 SDValue N2, N3;
7347
7348 if (VT == MVT::v8i8) {
7349 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
7350 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
7351
7352 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7353 DAG.getIntPtrConstant(4, dl));
7354 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7355 DAG.getIntPtrConstant(4, dl));
7356 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7357 DAG.getIntPtrConstant(0, dl));
7358 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7359 DAG.getIntPtrConstant(0, dl));
7360
7361 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
7362 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
7363
7364 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7365 N0 = LowerCONCAT_VECTORS(N0, DAG);
7366
7367 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
7368 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
7369 MVT::i32),
7370 N0);
7371 return N0;
7372 }
7373
7374 // v4i16 sdiv ... Convert to float.
7375 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
7376 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
7377 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
7378 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
7379 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7380 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7381
7382 // Use reciprocal estimate and two refinement steps.
7383 // float4 recip = vrecpeq_f32(yf);
7384 // recip *= vrecpsq_f32(yf, recip);
7385 // recip *= vrecpsq_f32(yf, recip);
7386 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7387 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7388 BN1);
7389 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7390 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7391 BN1, N2);
7392 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7393 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7394 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7395 BN1, N2);
7396 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7397 // Simply multiplying by the reciprocal estimate can leave us a few ulps
7398 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
7399 // and that it will never cause us to return an answer too large).
7400 // float4 result = as_float4(as_int4(xf*recip) + 2);
7401 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7402 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7403 N1 = DAG.getConstant(2, dl, MVT::v4i32);
7404 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7405 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7406 // Convert back to integer and return.
7407 // return vmovn_u32(vcvt_s32_f32(result));
7408 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7409 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7410 return N0;
7411}
7412
7413static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
7414 EVT VT = Op.getNode()->getValueType(0);
7415 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
7416
7417 unsigned Opc;
7418 bool ExtraOp = false;
7419 switch (Op.getOpcode()) {
7420 default: llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7420)
;
7421 case ISD::ADDC: Opc = ARMISD::ADDC; break;
7422 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
7423 case ISD::SUBC: Opc = ARMISD::SUBC; break;
7424 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
7425 }
7426
7427 if (!ExtraOp)
7428 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7429 Op.getOperand(1));
7430 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7431 Op.getOperand(1), Op.getOperand(2));
7432}
7433
7434static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) {
7435 SDNode *N = Op.getNode();
7436 EVT VT = N->getValueType(0);
7437 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
7438
7439 SDValue Carry = Op.getOperand(2);
7440 EVT CarryVT = Carry.getValueType();
7441
7442 SDLoc DL(Op);
7443
7444 APInt NegOne = APInt::getAllOnesValue(CarryVT.getScalarSizeInBits());
7445
7446 SDValue Result;
7447 if (Op.getOpcode() == ISD::ADDCARRY) {
7448 // This converts the boolean value carry into the carry flag.
7449 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
7450
7451 // Do the addition proper using the carry flag we wanted.
7452 Result = DAG.getNode(ARMISD::ADDE, DL, VTs, Op.getOperand(0),
7453 Op.getOperand(1), Carry.getValue(1));
7454
7455 // Now convert the carry flag into a boolean value.
7456 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
7457 } else {
7458 // ARMISD::SUBE expects a carry not a borrow like ISD::SUBCARRY so we
7459 // have to invert the carry first.
7460 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
7461 DAG.getConstant(1, DL, MVT::i32), Carry);
7462 // This converts the boolean value carry into the carry flag.
7463 Carry = ConvertBooleanCarryToCarryFlag(Carry, DAG);
7464
7465 // Do the subtraction proper using the carry flag we wanted.
7466 Result = DAG.getNode(ARMISD::SUBE, DL, VTs, Op.getOperand(0),
7467 Op.getOperand(1), Carry.getValue(1));
7468
7469 // Now convert the carry flag into a boolean value.
7470 Carry = ConvertCarryFlagToBooleanCarry(Result.getValue(1), VT, DAG);
7471 // But the carry returned by ARMISD::SUBE is not a borrow as expected
7472 // by ISD::SUBCARRY, so compute 1 - C.
7473 Carry = DAG.getNode(ISD::SUB, DL, MVT::i32,
7474 DAG.getConstant(1, DL, MVT::i32), Carry);
7475 }
7476
7477 // Return both values.
7478 return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Result, Carry);
7479}
7480
7481SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
7482 assert(Subtarget->isTargetDarwin())(static_cast <bool> (Subtarget->isTargetDarwin()) ? void
(0) : __assert_fail ("Subtarget->isTargetDarwin()", "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7482, __extension__ __PRETTY_FUNCTION__))
;
7483
7484 // For iOS, we want to call an alternative entry point: __sincos_stret,
7485 // return values are passed via sret.
7486 SDLoc dl(Op);
7487 SDValue Arg = Op.getOperand(0);
7488 EVT ArgVT = Arg.getValueType();
7489 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
7490 auto PtrVT = getPointerTy(DAG.getDataLayout());
7491
7492 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7493 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7494
7495 // Pair of floats / doubles used to pass the result.
7496 Type *RetTy = StructType::get(ArgTy, ArgTy);
7497 auto &DL = DAG.getDataLayout();
7498
7499 ArgListTy Args;
7500 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
7501 SDValue SRet;
7502 if (ShouldUseSRet) {
7503 // Create stack object for sret.
7504 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
7505 const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
7506 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
7507 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
7508
7509 ArgListEntry Entry;
7510 Entry.Node = SRet;
7511 Entry.Ty = RetTy->getPointerTo();
7512 Entry.IsSExt = false;
7513 Entry.IsZExt = false;
7514 Entry.IsSRet = true;
7515 Args.push_back(Entry);
7516 RetTy = Type::getVoidTy(*DAG.getContext());
7517 }
7518
7519 ArgListEntry Entry;
7520 Entry.Node = Arg;
7521 Entry.Ty = ArgTy;
7522 Entry.IsSExt = false;
7523 Entry.IsZExt = false;
7524 Args.push_back(Entry);
7525
7526 const char *LibcallName =
7527 (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
7528 RTLIB::Libcall LC =
7529 (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
7530 CallingConv::ID CC = getLibcallCallingConv(LC);
7531 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
7532
7533 TargetLowering::CallLoweringInfo CLI(DAG);
7534 CLI.setDebugLoc(dl)
7535 .setChain(DAG.getEntryNode())
7536 .setCallee(CC, RetTy, Callee, std::move(Args))
7537 .setDiscardResult(ShouldUseSRet);
7538 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7539
7540 if (!ShouldUseSRet)
7541 return CallResult.first;
7542
7543 SDValue LoadSin =
7544 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
7545
7546 // Address of cos field.
7547 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
7548 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
7549 SDValue LoadCos =
7550 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
7551
7552 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
7553 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
7554 LoadSin.getValue(0), LoadCos.getValue(0));
7555}
7556
7557SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
7558 bool Signed,
7559 SDValue &Chain) const {
7560 EVT VT = Op.getValueType();
7561 assert((VT == MVT::i32 || VT == MVT::i64) &&(static_cast <bool> ((VT == MVT::i32 || VT == MVT::i64)
&& "unexpected type for custom lowering DIV") ? void
(0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7562, __extension__ __PRETTY_FUNCTION__))
7562 "unexpected type for custom lowering DIV")(static_cast <bool> ((VT == MVT::i32 || VT == MVT::i64)
&& "unexpected type for custom lowering DIV") ? void
(0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7562, __extension__ __PRETTY_FUNCTION__))
;
7563 SDLoc dl(Op);
7564
7565 const auto &DL = DAG.getDataLayout();
7566 const auto &TLI = DAG.getTargetLoweringInfo();
7567
7568 const char *Name = nullptr;
7569 if (Signed)
7570 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
7571 else
7572 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
7573
7574 SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
7575
7576 ARMTargetLowering::ArgListTy Args;
7577
7578 for (auto AI : {1, 0}) {
7579 ArgListEntry Arg;
7580 Arg.Node = Op.getOperand(AI);
7581 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
7582 Args.push_back(Arg);
7583 }
7584
7585 CallLoweringInfo CLI(DAG);
7586 CLI.setDebugLoc(dl)
7587 .setChain(Chain)
7588 .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
7589 ES, std::move(Args));
7590
7591 return LowerCallTo(CLI).first;
7592}
7593
7594SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
7595 bool Signed) const {
7596 assert(Op.getValueType() == MVT::i32 &&(static_cast <bool> (Op.getValueType() == MVT::i32 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7597, __extension__ __PRETTY_FUNCTION__))
7597 "unexpected type for custom lowering DIV")(static_cast <bool> (Op.getValueType() == MVT::i32 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7597, __extension__ __PRETTY_FUNCTION__))
;
7598 SDLoc dl(Op);
7599
7600 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
7601 DAG.getEntryNode(), Op.getOperand(1));
7602
7603 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7604}
7605
7606static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
7607 SDLoc DL(N);
7608 SDValue Op = N->getOperand(1);
7609 if (N->getValueType(0) == MVT::i32)
7610 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
7611 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7612 DAG.getConstant(0, DL, MVT::i32));
7613 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7614 DAG.getConstant(1, DL, MVT::i32));
7615 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
7616 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
7617}
7618
7619void ARMTargetLowering::ExpandDIV_Windows(
7620 SDValue Op, SelectionDAG &DAG, bool Signed,
7621 SmallVectorImpl<SDValue> &Results) const {
7622 const auto &DL = DAG.getDataLayout();
7623 const auto &TLI = DAG.getTargetLoweringInfo();
7624
7625 assert(Op.getValueType() == MVT::i64 &&(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7626, __extension__ __PRETTY_FUNCTION__))
7626 "unexpected type for custom lowering DIV")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7626, __extension__ __PRETTY_FUNCTION__))
;
7627 SDLoc dl(Op);
7628
7629 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
7630
7631 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7632
7633 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
7634 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
7635 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
7636 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
7637
7638 Results.push_back(Lower);
7639 Results.push_back(Upper);
7640}
7641
7642static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
7643 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
7644 // Acquire/Release load/store is not legal for targets without a dmb or
7645 // equivalent available.
7646 return SDValue();
7647
7648 // Monotonic load/store is legal for all targets.
7649 return Op;
7650}
7651
7652static void ReplaceREADCYCLECOUNTER(SDNode *N,
7653 SmallVectorImpl<SDValue> &Results,
7654 SelectionDAG &DAG,
7655 const ARMSubtarget *Subtarget) {
7656 SDLoc DL(N);
7657 // Under Power Management extensions, the cycle-count is:
7658 // mrc p15, #0, <Rt>, c9, c13, #0
7659 SDValue Ops[] = { N->getOperand(0), // Chain
7660 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
7661 DAG.getConstant(15, DL, MVT::i32),
7662 DAG.getConstant(0, DL, MVT::i32),
7663 DAG.getConstant(9, DL, MVT::i32),
7664 DAG.getConstant(13, DL, MVT::i32),
7665 DAG.getConstant(0, DL, MVT::i32)
7666 };
7667
7668 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
7669 DAG.getVTList(MVT::i32, MVT::Other), Ops);
7670 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
7671 DAG.getConstant(0, DL, MVT::i32)));
7672 Results.push_back(Cycles32.getValue(1));
7673}
7674
7675static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
7676 SDLoc dl(V.getNode());
7677 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
7678 SDValue VHi = DAG.getAnyExtOrTrunc(
7679 DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
7680 dl, MVT::i32);
7681 bool isBigEndian = DAG.getDataLayout().isBigEndian();
7682 if (isBigEndian)
7683 std::swap (VLo, VHi);
7684 SDValue RegClass =
7685 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
7686 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
7687 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
7688 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
7689 return SDValue(
7690 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
7691}
7692
7693static void ReplaceCMP_SWAP_64Results(SDNode *N,
7694 SmallVectorImpl<SDValue> & Results,
7695 SelectionDAG &DAG) {
7696 assert(N->getValueType(0) == MVT::i64 &&(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"AtomicCmpSwap on types less than 64 should be legal") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7697, __extension__ __PRETTY_FUNCTION__))
7697 "AtomicCmpSwap on types less than 64 should be legal")(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"AtomicCmpSwap on types less than 64 should be legal") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7697, __extension__ __PRETTY_FUNCTION__))
;
7698 SDValue Ops[] = {N->getOperand(1),
7699 createGPRPairNode(DAG, N->getOperand(2)),
7700 createGPRPairNode(DAG, N->getOperand(3)),
7701 N->getOperand(0)};
7702 SDNode *CmpSwap = DAG.getMachineNode(
7703 ARM::CMP_SWAP_64, SDLoc(N),
7704 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
7705
7706 MachineFunction &MF = DAG.getMachineFunction();
7707 MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
7708 MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
7709 cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
7710
7711 bool isBigEndian = DAG.getDataLayout().isBigEndian();
7712
7713 Results.push_back(
7714 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
7715 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
7716 Results.push_back(
7717 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
7718 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
7719 Results.push_back(SDValue(CmpSwap, 2));
7720}
7721
7722static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
7723 SelectionDAG &DAG) {
7724 const auto &TLI = DAG.getTargetLoweringInfo();
7725
7726 assert(Subtarget.getTargetTriple().isOSMSVCRT() &&(static_cast <bool> (Subtarget.getTargetTriple().isOSMSVCRT
() && "Custom lowering is MSVCRT specific!") ? void (
0) : __assert_fail ("Subtarget.getTargetTriple().isOSMSVCRT() && \"Custom lowering is MSVCRT specific!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7727, __extension__ __PRETTY_FUNCTION__))
7727 "Custom lowering is MSVCRT specific!")(static_cast <bool> (Subtarget.getTargetTriple().isOSMSVCRT
() && "Custom lowering is MSVCRT specific!") ? void (
0) : __assert_fail ("Subtarget.getTargetTriple().isOSMSVCRT() && \"Custom lowering is MSVCRT specific!\""
, "/build/llvm-toolchain-snapshot-6.0~svn320940/lib/Target/ARM/ARMISelLowering.cpp"
, 7727, __extension__ __PRETTY_FUNCTION__))
;
7728
7729 SDLoc dl(Op);
7730 SDValue Val = Op.getOperand(0);
7731 MVT Ty = Val->getSimpleValueType(0);
7732 SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
7733 SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
7734 TLI.getPointerTy(DAG.getDataLayout()));
7735
7736 TargetLowering::ArgListTy Args;
7737 TargetLowering::ArgListEntry Entry;