Bug Summary

File:lib/Target/ARM/ARMISelLowering.cpp
Warning:line 6734, column 14
1st function call argument is an uninitialized value

Annotated Source Code

/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp

1//===- ARMISelLowering.cpp - ARM DAG Lowering Implementation --------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARMISelLowering.h"
16#include "ARMBaseInstrInfo.h"
17#include "ARMBaseRegisterInfo.h"
18#include "ARMCallingConv.h"
19#include "ARMConstantPoolValue.h"
20#include "ARMMachineFunctionInfo.h"
21#include "ARMPerfectShuffle.h"
22#include "ARMRegisterInfo.h"
23#include "ARMSelectionDAGInfo.h"
24#include "ARMSubtarget.h"
25#include "MCTargetDesc/ARMAddressingModes.h"
26#include "MCTargetDesc/ARMBaseInfo.h"
27#include "Utils/ARMBaseInfo.h"
28#include "llvm/ADT/APFloat.h"
29#include "llvm/ADT/APInt.h"
30#include "llvm/ADT/ArrayRef.h"
31#include "llvm/ADT/BitVector.h"
32#include "llvm/ADT/DenseMap.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/SmallPtrSet.h"
35#include "llvm/ADT/SmallVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringExtras.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/StringSwitch.h"
40#include "llvm/ADT/Triple.h"
41#include "llvm/ADT/Twine.h"
42#include "llvm/Analysis/VectorUtils.h"
43#include "llvm/CodeGen/CallingConvLower.h"
44#include "llvm/CodeGen/ISDOpcodes.h"
45#include "llvm/CodeGen/IntrinsicLowering.h"
46#include "llvm/CodeGen/MachineBasicBlock.h"
47#include "llvm/CodeGen/MachineConstantPool.h"
48#include "llvm/CodeGen/MachineFrameInfo.h"
49#include "llvm/CodeGen/MachineFunction.h"
50#include "llvm/CodeGen/MachineInstr.h"
51#include "llvm/CodeGen/MachineInstrBuilder.h"
52#include "llvm/CodeGen/MachineJumpTableInfo.h"
53#include "llvm/CodeGen/MachineMemOperand.h"
54#include "llvm/CodeGen/MachineOperand.h"
55#include "llvm/CodeGen/MachineRegisterInfo.h"
56#include "llvm/CodeGen/MachineValueType.h"
57#include "llvm/CodeGen/RuntimeLibcalls.h"
58#include "llvm/CodeGen/SelectionDAG.h"
59#include "llvm/CodeGen/SelectionDAGNodes.h"
60#include "llvm/CodeGen/TargetInstrInfo.h"
61#include "llvm/CodeGen/TargetLowering.h"
62#include "llvm/CodeGen/TargetOpcodes.h"
63#include "llvm/CodeGen/TargetRegisterInfo.h"
64#include "llvm/CodeGen/TargetSubtargetInfo.h"
65#include "llvm/CodeGen/ValueTypes.h"
66#include "llvm/IR/Attributes.h"
67#include "llvm/IR/CallingConv.h"
68#include "llvm/IR/Constant.h"
69#include "llvm/IR/Constants.h"
70#include "llvm/IR/DataLayout.h"
71#include "llvm/IR/DebugLoc.h"
72#include "llvm/IR/DerivedTypes.h"
73#include "llvm/IR/Function.h"
74#include "llvm/IR/GlobalAlias.h"
75#include "llvm/IR/GlobalValue.h"
76#include "llvm/IR/GlobalVariable.h"
77#include "llvm/IR/IRBuilder.h"
78#include "llvm/IR/InlineAsm.h"
79#include "llvm/IR/Instruction.h"
80#include "llvm/IR/Instructions.h"
81#include "llvm/IR/IntrinsicInst.h"
82#include "llvm/IR/Intrinsics.h"
83#include "llvm/IR/Module.h"
84#include "llvm/IR/Type.h"
85#include "llvm/IR/User.h"
86#include "llvm/IR/Value.h"
87#include "llvm/MC/MCInstrDesc.h"
88#include "llvm/MC/MCInstrItineraries.h"
89#include "llvm/MC/MCRegisterInfo.h"
90#include "llvm/MC/MCSchedule.h"
91#include "llvm/Support/AtomicOrdering.h"
92#include "llvm/Support/BranchProbability.h"
93#include "llvm/Support/Casting.h"
94#include "llvm/Support/CodeGen.h"
95#include "llvm/Support/CommandLine.h"
96#include "llvm/Support/Compiler.h"
97#include "llvm/Support/Debug.h"
98#include "llvm/Support/ErrorHandling.h"
99#include "llvm/Support/KnownBits.h"
100#include "llvm/Support/MathExtras.h"
101#include "llvm/Support/raw_ostream.h"
102#include "llvm/Target/TargetMachine.h"
103#include "llvm/Target/TargetOptions.h"
104#include <algorithm>
105#include <cassert>
106#include <cstdint>
107#include <cstdlib>
108#include <iterator>
109#include <limits>
110#include <string>
111#include <tuple>
112#include <utility>
113#include <vector>
114
115using namespace llvm;
116
117#define DEBUG_TYPE"arm-isel" "arm-isel"
118
119STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
120STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt"
, "Number of GAs materialized with movw + movt", {0}, false}
;
121STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals"
, "Number of loops generated for byval arguments", {0}, false
}
;
122STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
, {0}, false}
123 "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
, {0}, false}
;
124
125static cl::opt<bool>
126ARMInterworking("arm-interworking", cl::Hidden,
127 cl::desc("Enable / disable ARM interworking (for debugging only)"),
128 cl::init(true));
129
130static cl::opt<bool> EnableConstpoolPromotion(
131 "arm-promote-constant", cl::Hidden,
132 cl::desc("Enable / disable promotion of unnamed_addr constants into "
133 "constant pools"),
134 cl::init(false)); // FIXME: set to true by default once PR32780 is fixed
135static cl::opt<unsigned> ConstpoolPromotionMaxSize(
136 "arm-promote-constant-max-size", cl::Hidden,
137 cl::desc("Maximum size of constant to promote into a constant pool"),
138 cl::init(64));
139static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
140 "arm-promote-constant-max-total", cl::Hidden,
141 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
142 cl::init(128));
143
144// The APCS parameter registers.
145static const MCPhysReg GPRArgRegs[] = {
146 ARM::R0, ARM::R1, ARM::R2, ARM::R3
147};
148
149void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
150 MVT PromotedBitwiseVT) {
151 if (VT != PromotedLdStVT) {
152 setOperationAction(ISD::LOAD, VT, Promote);
153 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
154
155 setOperationAction(ISD::STORE, VT, Promote);
156 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
157 }
158
159 MVT ElemTy = VT.getVectorElementType();
160 if (ElemTy != MVT::f64)
161 setOperationAction(ISD::SETCC, VT, Custom);
162 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
163 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
164 if (ElemTy == MVT::i32) {
165 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
166 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
167 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
168 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
169 } else {
170 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
171 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
172 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
173 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
174 }
175 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
176 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
177 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
178 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
179 setOperationAction(ISD::SELECT, VT, Expand);
180 setOperationAction(ISD::SELECT_CC, VT, Expand);
181 setOperationAction(ISD::VSELECT, VT, Expand);
182 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
183 if (VT.isInteger()) {
184 setOperationAction(ISD::SHL, VT, Custom);
185 setOperationAction(ISD::SRA, VT, Custom);
186 setOperationAction(ISD::SRL, VT, Custom);
187 }
188
189 // Promote all bit-wise operations.
190 if (VT.isInteger() && VT != PromotedBitwiseVT) {
191 setOperationAction(ISD::AND, VT, Promote);
192 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
193 setOperationAction(ISD::OR, VT, Promote);
194 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
195 setOperationAction(ISD::XOR, VT, Promote);
196 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
197 }
198
199 // Neon does not support vector divide/remainder operations.
200 setOperationAction(ISD::SDIV, VT, Expand);
201 setOperationAction(ISD::UDIV, VT, Expand);
202 setOperationAction(ISD::FDIV, VT, Expand);
203 setOperationAction(ISD::SREM, VT, Expand);
204 setOperationAction(ISD::UREM, VT, Expand);
205 setOperationAction(ISD::FREM, VT, Expand);
206
207 if (!VT.isFloatingPoint() &&
208 VT != MVT::v2i64 && VT != MVT::v1i64)
209 for (auto Opcode : {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
210 setOperationAction(Opcode, VT, Legal);
211}
212
213void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
214 addRegisterClass(VT, &ARM::DPRRegClass);
215 addTypeForNEON(VT, MVT::f64, MVT::v2i32);
216}
217
218void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
219 addRegisterClass(VT, &ARM::DPairRegClass);
220 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
221}
222
223ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
224 const ARMSubtarget &STI)
225 : TargetLowering(TM), Subtarget(&STI) {
226 RegInfo = Subtarget->getRegisterInfo();
227 Itins = Subtarget->getInstrItineraryData();
228
229 setBooleanContents(ZeroOrOneBooleanContent);
230 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
231
232 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
233 !Subtarget->isTargetWatchOS()) {
234 bool IsHFTarget = TM.Options.FloatABIType == FloatABI::Hard;
235 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
236 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
237 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
238 : CallingConv::ARM_AAPCS);
239 }
240
241 if (Subtarget->isTargetMachO()) {
242 // Uses VFP for Thumb libfuncs if available.
243 if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
244 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
245 static const struct {
246 const RTLIB::Libcall Op;
247 const char * const Name;
248 const ISD::CondCode Cond;
249 } LibraryCalls[] = {
250 // Single-precision floating-point arithmetic.
251 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
252 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
253 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
254 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
255
256 // Double-precision floating-point arithmetic.
257 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
258 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
259 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
260 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
261
262 // Single-precision comparisons.
263 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
264 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
265 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
266 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
267 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
268 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
269 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
270 { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
271
272 // Double-precision comparisons.
273 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
274 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
275 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
276 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
277 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
278 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
279 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
280 { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
281
282 // Floating-point to integer conversions.
283 // i64 conversions are done via library routines even when generating VFP
284 // instructions, so use the same ones.
285 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
286 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
287 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
288 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
289
290 // Conversions between floating types.
291 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
292 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
293
294 // Integer to floating-point conversions.
295 // i64 conversions are done via library routines even when generating VFP
296 // instructions, so use the same ones.
297 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
298 // e.g., __floatunsidf vs. __floatunssidfvfp.
299 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
300 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
301 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
302 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
303 };
304
305 for (const auto &LC : LibraryCalls) {
306 setLibcallName(LC.Op, LC.Name);
307 if (LC.Cond != ISD::SETCC_INVALID)
308 setCmpLibcallCC(LC.Op, LC.Cond);
309 }
310 }
311
312 // Set the correct calling convention for ARMv7k WatchOS. It's just
313 // AAPCS_VFP for functions as simple as libcalls.
314 if (Subtarget->isTargetWatchABI()) {
315 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
316 setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
317 }
318 }
319
320 // These libcalls are not available in 32-bit.
321 setLibcallName(RTLIB::SHL_I128, nullptr);
322 setLibcallName(RTLIB::SRL_I128, nullptr);
323 setLibcallName(RTLIB::SRA_I128, nullptr);
324
325 // RTLIB
326 if (Subtarget->isAAPCS_ABI() &&
327 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
328 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
329 static const struct {
330 const RTLIB::Libcall Op;
331 const char * const Name;
332 const CallingConv::ID CC;
333 const ISD::CondCode Cond;
334 } LibraryCalls[] = {
335 // Double-precision floating-point arithmetic helper functions
336 // RTABI chapter 4.1.2, Table 2
337 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
338 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
339 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
340 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
341
342 // Double-precision floating-point comparison helper functions
343 // RTABI chapter 4.1.2, Table 3
344 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
345 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
346 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
347 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
348 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
349 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
350 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
351 { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
352
353 // Single-precision floating-point arithmetic helper functions
354 // RTABI chapter 4.1.2, Table 4
355 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
356 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
357 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
358 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
359
360 // Single-precision floating-point comparison helper functions
361 // RTABI chapter 4.1.2, Table 5
362 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
363 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
364 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
365 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
366 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
367 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
368 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
369 { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
370
371 // Floating-point to integer conversions.
372 // RTABI chapter 4.1.2, Table 6
373 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
374 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
375 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
376 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
377 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
381
382 // Conversions between floating types.
383 // RTABI chapter 4.1.2, Table 7
384 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
385 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
386 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387
388 // Integer to floating-point conversions.
389 // RTABI chapter 4.1.2, Table 8
390 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
391 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
392 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
393 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
394 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398
399 // Long long helper functions
400 // RTABI chapter 4.2, Table 9
401 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
402 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
403 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405
406 // Integer division functions
407 // RTABI chapter 4.3.1
408 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
409 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
410 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
411 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
412 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
413 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
414 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
415 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
416 };
417
418 for (const auto &LC : LibraryCalls) {
419 setLibcallName(LC.Op, LC.Name);
420 setLibcallCallingConv(LC.Op, LC.CC);
421 if (LC.Cond != ISD::SETCC_INVALID)
422 setCmpLibcallCC(LC.Op, LC.Cond);
423 }
424
425 // EABI dependent RTLIB
426 if (TM.Options.EABIVersion == EABI::EABI4 ||
427 TM.Options.EABIVersion == EABI::EABI5) {
428 static const struct {
429 const RTLIB::Libcall Op;
430 const char *const Name;
431 const CallingConv::ID CC;
432 const ISD::CondCode Cond;
433 } MemOpsLibraryCalls[] = {
434 // Memory operations
435 // RTABI chapter 4.3.4
436 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
437 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
438 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
439 };
440
441 for (const auto &LC : MemOpsLibraryCalls) {
442 setLibcallName(LC.Op, LC.Name);
443 setLibcallCallingConv(LC.Op, LC.CC);
444 if (LC.Cond != ISD::SETCC_INVALID)
445 setCmpLibcallCC(LC.Op, LC.Cond);
446 }
447 }
448 }
449
450 if (Subtarget->isTargetWindows()) {
451 static const struct {
452 const RTLIB::Libcall Op;
453 const char * const Name;
454 const CallingConv::ID CC;
455 } LibraryCalls[] = {
456 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
457 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
458 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
459 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
460 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
461 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
462 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
463 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
464 };
465
466 for (const auto &LC : LibraryCalls) {
467 setLibcallName(LC.Op, LC.Name);
468 setLibcallCallingConv(LC.Op, LC.CC);
469 }
470 }
471
472 // Use divmod compiler-rt calls for iOS 5.0 and later.
473 if (Subtarget->isTargetMachO() &&
474 !(Subtarget->isTargetIOS() &&
475 Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
476 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
477 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
478 }
479
480 // The half <-> float conversion functions are always soft-float on
481 // non-watchos platforms, but are needed for some targets which use a
482 // hard-float calling convention by default.
483 if (!Subtarget->isTargetWatchABI()) {
484 if (Subtarget->isAAPCS_ABI()) {
485 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
486 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
487 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
488 } else {
489 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
490 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
491 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
492 }
493 }
494
495 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
496 // a __gnu_ prefix (which is the default).
497 if (Subtarget->isTargetAEABI()) {
498 static const struct {
499 const RTLIB::Libcall Op;
500 const char * const Name;
501 const CallingConv::ID CC;
502 } LibraryCalls[] = {
503 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
504 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
505 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
506 };
507
508 for (const auto &LC : LibraryCalls) {
509 setLibcallName(LC.Op, LC.Name);
510 setLibcallCallingConv(LC.Op, LC.CC);
511 }
512 }
513
514 if (Subtarget->isThumb1Only())
515 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
516 else
517 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
518
519 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
520 !Subtarget->isThumb1Only()) {
521 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
522 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
523 }
524
525 for (MVT VT : MVT::vector_valuetypes()) {
526 for (MVT InnerVT : MVT::vector_valuetypes()) {
527 setTruncStoreAction(VT, InnerVT, Expand);
528 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
529 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
530 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
531 }
532
533 setOperationAction(ISD::MULHS, VT, Expand);
534 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
535 setOperationAction(ISD::MULHU, VT, Expand);
536 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
537
538 setOperationAction(ISD::BSWAP, VT, Expand);
539 }
540
541 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
542 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
543
544 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
545 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
546
547 if (Subtarget->hasNEON()) {
548 addDRTypeForNEON(MVT::v2f32);
549 addDRTypeForNEON(MVT::v8i8);
550 addDRTypeForNEON(MVT::v4i16);
551 addDRTypeForNEON(MVT::v2i32);
552 addDRTypeForNEON(MVT::v1i64);
553
554 addQRTypeForNEON(MVT::v4f32);
555 addQRTypeForNEON(MVT::v2f64);
556 addQRTypeForNEON(MVT::v16i8);
557 addQRTypeForNEON(MVT::v8i16);
558 addQRTypeForNEON(MVT::v4i32);
559 addQRTypeForNEON(MVT::v2i64);
560
561 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
562 // neither Neon nor VFP support any arithmetic operations on it.
563 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
564 // supported for v4f32.
565 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
566 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
567 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
568 // FIXME: Code duplication: FDIV and FREM are expanded always, see
569 // ARMTargetLowering::addTypeForNEON method for details.
570 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
571 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
572 // FIXME: Create unittest.
573 // In another words, find a way when "copysign" appears in DAG with vector
574 // operands.
575 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
576 // FIXME: Code duplication: SETCC has custom operation action, see
577 // ARMTargetLowering::addTypeForNEON method for details.
578 setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
579 // FIXME: Create unittest for FNEG and for FABS.
580 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
581 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
582 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
583 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
584 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
585 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
586 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
587 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
588 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
589 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
590 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
591 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
592 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
593 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
594 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
595 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
596 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
597 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
598
599 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
600 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
601 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
602 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
603 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
604 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
605 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
606 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
607 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
608 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
609 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
610 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
611 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
612 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
613
614 // Mark v2f32 intrinsics.
615 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
616 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
617 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
618 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
619 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
620 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
621 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
622 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
623 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
624 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
625 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
626 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
627 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
628 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
629
630 // Neon does not support some operations on v1i64 and v2i64 types.
631 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
632 // Custom handling for some quad-vector types to detect VMULL.
633 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
634 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
635 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
636 // Custom handling for some vector types to avoid expensive expansions
637 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
638 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
639 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
640 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
641 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
642 // a destination type that is wider than the source, and nor does
643 // it have a FP_TO_[SU]INT instruction with a narrower destination than
644 // source.
645 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
646 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
647 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
648 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
649
650 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
651 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
652
653 // NEON does not have single instruction CTPOP for vectors with element
654 // types wider than 8-bits. However, custom lowering can leverage the
655 // v8i8/v16i8 vcnt instruction.
656 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
657 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
658 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
659 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
660 setOperationAction(ISD::CTPOP, MVT::v1i64, Expand);
661 setOperationAction(ISD::CTPOP, MVT::v2i64, Expand);
662
663 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
664 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
665
666 // NEON does not have single instruction CTTZ for vectors.
667 setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
668 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
669 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
670 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
671
672 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
673 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
674 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
675 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
676
677 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
678 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
679 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
680 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
681
682 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
683 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
684 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
685 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
686
687 // NEON only has FMA instructions as of VFP4.
688 if (!Subtarget->hasVFP4()) {
689 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
690 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
691 }
692
693 setTargetDAGCombine(ISD::INTRINSIC_VOID);
694 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
695 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
696 setTargetDAGCombine(ISD::SHL);
697 setTargetDAGCombine(ISD::SRL);
698 setTargetDAGCombine(ISD::SRA);
699 setTargetDAGCombine(ISD::SIGN_EXTEND);
700 setTargetDAGCombine(ISD::ZERO_EXTEND);
701 setTargetDAGCombine(ISD::ANY_EXTEND);
702 setTargetDAGCombine(ISD::BUILD_VECTOR);
703 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
704 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
705 setTargetDAGCombine(ISD::STORE);
706 setTargetDAGCombine(ISD::FP_TO_SINT);
707 setTargetDAGCombine(ISD::FP_TO_UINT);
708 setTargetDAGCombine(ISD::FDIV);
709 setTargetDAGCombine(ISD::LOAD);
710
711 // It is legal to extload from v4i8 to v4i16 or v4i32.
712 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
713 MVT::v2i32}) {
714 for (MVT VT : MVT::integer_vector_valuetypes()) {
715 setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
716 setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
717 setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
718 }
719 }
720 }
721
722 if (Subtarget->isFPOnlySP()) {
723 // When targeting a floating-point unit with only single-precision
724 // operations, f64 is legal for the few double-precision instructions which
725 // are present However, no double-precision operations other than moves,
726 // loads and stores are provided by the hardware.
727 setOperationAction(ISD::FADD, MVT::f64, Expand);
728 setOperationAction(ISD::FSUB, MVT::f64, Expand);
729 setOperationAction(ISD::FMUL, MVT::f64, Expand);
730 setOperationAction(ISD::FMA, MVT::f64, Expand);
731 setOperationAction(ISD::FDIV, MVT::f64, Expand);
732 setOperationAction(ISD::FREM, MVT::f64, Expand);
733 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
734 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
735 setOperationAction(ISD::FNEG, MVT::f64, Expand);
736 setOperationAction(ISD::FABS, MVT::f64, Expand);
737 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
738 setOperationAction(ISD::FSIN, MVT::f64, Expand);
739 setOperationAction(ISD::FCOS, MVT::f64, Expand);
740 setOperationAction(ISD::FPOW, MVT::f64, Expand);
741 setOperationAction(ISD::FLOG, MVT::f64, Expand);
742 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
743 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
744 setOperationAction(ISD::FEXP, MVT::f64, Expand);
745 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
746 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
747 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
748 setOperationAction(ISD::FRINT, MVT::f64, Expand);
749 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
750 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
751 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
752 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
753 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
754 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
755 setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
756 setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
757 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
758 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
759 }
760
761 computeRegisterProperties(Subtarget->getRegisterInfo());
762
763 // ARM does not have floating-point extending loads.
764 for (MVT VT : MVT::fp_valuetypes()) {
765 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
766 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
767 }
768
769 // ... or truncating stores
770 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
771 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
772 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
773
774 // ARM does not have i1 sign extending load.
775 for (MVT VT : MVT::integer_valuetypes())
776 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
777
778 // ARM supports all 4 flavors of integer indexed load / store.
779 if (!Subtarget->isThumb1Only()) {
780 for (unsigned im = (unsigned)ISD::PRE_INC;
781 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
782 setIndexedLoadAction(im, MVT::i1, Legal);
783 setIndexedLoadAction(im, MVT::i8, Legal);
784 setIndexedLoadAction(im, MVT::i16, Legal);
785 setIndexedLoadAction(im, MVT::i32, Legal);
786 setIndexedStoreAction(im, MVT::i1, Legal);
787 setIndexedStoreAction(im, MVT::i8, Legal);
788 setIndexedStoreAction(im, MVT::i16, Legal);
789 setIndexedStoreAction(im, MVT::i32, Legal);
790 }
791 } else {
792 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
793 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
794 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
795 }
796
797 setOperationAction(ISD::SADDO, MVT::i32, Custom);
798 setOperationAction(ISD::UADDO, MVT::i32, Custom);
799 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
800 setOperationAction(ISD::USUBO, MVT::i32, Custom);
801
802 // i64 operation support.
803 setOperationAction(ISD::MUL, MVT::i64, Expand);
804 setOperationAction(ISD::MULHU, MVT::i32, Expand);
805 if (Subtarget->isThumb1Only()) {
806 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
807 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
808 }
809 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
810 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
811 setOperationAction(ISD::MULHS, MVT::i32, Expand);
812
813 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
814 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
815 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
816 setOperationAction(ISD::SRL, MVT::i64, Custom);
817 setOperationAction(ISD::SRA, MVT::i64, Custom);
818 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
819
820 setOperationAction(ISD::ADDC, MVT::i32, Custom);
821 setOperationAction(ISD::ADDE, MVT::i32, Custom);
822 setOperationAction(ISD::SUBC, MVT::i32, Custom);
823 setOperationAction(ISD::SUBE, MVT::i32, Custom);
824
825 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
826 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
827
828 // ARM does not have ROTL.
829 setOperationAction(ISD::ROTL, MVT::i32, Expand);
830 for (MVT VT : MVT::vector_valuetypes()) {
831 setOperationAction(ISD::ROTL, VT, Expand);
832 setOperationAction(ISD::ROTR, VT, Expand);
833 }
834 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
835 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
836 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
837 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
838
839 // @llvm.readcyclecounter requires the Performance Monitors extension.
840 // Default to the 0 expansion on unsupported platforms.
841 // FIXME: Technically there are older ARM CPUs that have
842 // implementation-specific ways of obtaining this information.
843 if (Subtarget->hasPerfMon())
844 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
845
846 // Only ARMv6 has BSWAP.
847 if (!Subtarget->hasV6Ops())
848 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
849
850 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
851 : Subtarget->hasDivideInARMMode();
852 if (!hasDivide) {
853 // These are expanded into libcalls if the cpu doesn't have HW divider.
854 setOperationAction(ISD::SDIV, MVT::i32, LibCall);
855 setOperationAction(ISD::UDIV, MVT::i32, LibCall);
856 }
857
858 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
859 setOperationAction(ISD::SDIV, MVT::i32, Custom);
860 setOperationAction(ISD::UDIV, MVT::i32, Custom);
861
862 setOperationAction(ISD::SDIV, MVT::i64, Custom);
863 setOperationAction(ISD::UDIV, MVT::i64, Custom);
864 }
865
866 setOperationAction(ISD::SREM, MVT::i32, Expand);
867 setOperationAction(ISD::UREM, MVT::i32, Expand);
868
869 // Register based DivRem for AEABI (RTABI 4.2)
870 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
871 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
872 Subtarget->isTargetWindows()) {
873 setOperationAction(ISD::SREM, MVT::i64, Custom);
874 setOperationAction(ISD::UREM, MVT::i64, Custom);
875 HasStandaloneRem = false;
876
877 if (Subtarget->isTargetWindows()) {
878 const struct {
879 const RTLIB::Libcall Op;
880 const char * const Name;
881 const CallingConv::ID CC;
882 } LibraryCalls[] = {
883 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
884 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
885 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
886 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
887
888 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
889 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
890 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
891 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
892 };
893
894 for (const auto &LC : LibraryCalls) {
895 setLibcallName(LC.Op, LC.Name);
896 setLibcallCallingConv(LC.Op, LC.CC);
897 }
898 } else {
899 const struct {
900 const RTLIB::Libcall Op;
901 const char * const Name;
902 const CallingConv::ID CC;
903 } LibraryCalls[] = {
904 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
905 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
906 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
907 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
908
909 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
910 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
911 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
912 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
913 };
914
915 for (const auto &LC : LibraryCalls) {
916 setLibcallName(LC.Op, LC.Name);
917 setLibcallCallingConv(LC.Op, LC.CC);
918 }
919 }
920
921 setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
922 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
923 setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
924 setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
925 } else {
926 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
927 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
928 }
929
930 if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
931 for (auto &VT : {MVT::f32, MVT::f64})
932 setOperationAction(ISD::FPOWI, VT, Custom);
933
934 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
935 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
936 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
937 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
938
939 setOperationAction(ISD::TRAP, MVT::Other, Legal);
940
941 // Use the default implementation.
942 setOperationAction(ISD::VASTART, MVT::Other, Custom);
943 setOperationAction(ISD::VAARG, MVT::Other, Expand);
944 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
945 setOperationAction(ISD::VAEND, MVT::Other, Expand);
946 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
947 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
948
949 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
950 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
951 else
952 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
953
954 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
955 // the default expansion.
956 InsertFencesForAtomic = false;
957 if (Subtarget->hasAnyDataBarrier() &&
958 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
959 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
960 // to ldrex/strex loops already.
961 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
962 if (!Subtarget->isThumb() || !Subtarget->isMClass())
963 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
964
965 // On v8, we have particularly efficient implementations of atomic fences
966 // if they can be combined with nearby atomic loads and stores.
967 if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
968 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
969 InsertFencesForAtomic = true;
970 }
971 } else {
972 // If there's anything we can use as a barrier, go through custom lowering
973 // for ATOMIC_FENCE.
974 // If target has DMB in thumb, Fences can be inserted.
975 if (Subtarget->hasDataBarrier())
976 InsertFencesForAtomic = true;
977
978 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
979 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
980
981 // Set them all for expansion, which will force libcalls.
982 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
983 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
984 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
985 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
986 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
987 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
988 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
989 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
990 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
991 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
992 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
993 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
994 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
995 // Unordered/Monotonic case.
996 if (!InsertFencesForAtomic) {
997 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
998 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
999 }
1000 }
1001
1002 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1003
1004 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1005 if (!Subtarget->hasV6Ops()) {
1006 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
1007 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
1008 }
1009 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1010
1011 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1012 !Subtarget->isThumb1Only()) {
1013 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1014 // iff target supports vfp2.
1015 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1016 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
1017 }
1018
1019 // We want to custom lower some of our intrinsics.
1020 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1021 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
1022 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
1023 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
1024 if (Subtarget->useSjLjEH())
1025 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1026
1027 setOperationAction(ISD::SETCC, MVT::i32, Expand);
1028 setOperationAction(ISD::SETCC, MVT::f32, Expand);
1029 setOperationAction(ISD::SETCC, MVT::f64, Expand);
1030 setOperationAction(ISD::SELECT, MVT::i32, Custom);
1031 setOperationAction(ISD::SELECT, MVT::f32, Custom);
1032 setOperationAction(ISD::SELECT, MVT::f64, Custom);
1033 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1034 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
1035 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
1036
1037 // Thumb-1 cannot currently select ARMISD::SUBE.
1038 if (!Subtarget->isThumb1Only())
1039 setOperationAction(ISD::SETCCE, MVT::i32, Custom);
1040
1041 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
1042 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1043 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1044 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1045 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1046
1047 // We don't support sin/cos/fmod/copysign/pow
1048 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1049 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1050 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1051 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1052 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1053 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1054 setOperationAction(ISD::FREM, MVT::f64, Expand);
1055 setOperationAction(ISD::FREM, MVT::f32, Expand);
1056 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1057 !Subtarget->isThumb1Only()) {
1058 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
1059 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
1060 }
1061 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1062 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1063
1064 if (!Subtarget->hasVFP4()) {
1065 setOperationAction(ISD::FMA, MVT::f64, Expand);
1066 setOperationAction(ISD::FMA, MVT::f32, Expand);
1067 }
1068
1069 // Various VFP goodness
1070 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1071 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1072 if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1073 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1074 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1075 }
1076
1077 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1078 if (!Subtarget->hasFP16()) {
1079 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1080 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1081 }
1082 }
1083
1084 // Combine sin / cos into one node or libcall if possible.
1085 if (Subtarget->hasSinCos()) {
1086 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1087 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1088 if (Subtarget->isTargetWatchABI()) {
1089 setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
1090 setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
1091 }
1092 if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
1093 // For iOS, we don't want to the normal expansion of a libcall to
1094 // sincos. We want to issue a libcall to __sincos_stret.
1095 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1096 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1097 }
1098 }
1099
1100 // FP-ARMv8 implements a lot of rounding-like FP operations.
1101 if (Subtarget->hasFPARMv8()) {
1102 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1103 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1104 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1105 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1106 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1107 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1108 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1109 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1110 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1111 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1112 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1113 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1114
1115 if (!Subtarget->isFPOnlySP()) {
1116 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1117 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1118 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1119 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1120 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1121 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1122 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1123 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1124 }
1125 }
1126
1127 if (Subtarget->hasNEON()) {
1128 // vmin and vmax aren't available in a scalar form, so we use
1129 // a NEON instruction with an undef lane instead.
1130 setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
1131 setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
1132 setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
1133 setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
1134 setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
1135 setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
1136 }
1137
1138 // We have target-specific dag combine patterns for the following nodes:
1139 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1140 setTargetDAGCombine(ISD::ADD);
1141 setTargetDAGCombine(ISD::SUB);
1142 setTargetDAGCombine(ISD::MUL);
1143 setTargetDAGCombine(ISD::AND);
1144 setTargetDAGCombine(ISD::OR);
1145 setTargetDAGCombine(ISD::XOR);
1146
1147 if (Subtarget->hasV6Ops())
1148 setTargetDAGCombine(ISD::SRL);
1149
1150 setStackPointerRegisterToSaveRestore(ARM::SP);
1151
1152 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1153 !Subtarget->hasVFP2())
1154 setSchedulingPreference(Sched::RegPressure);
1155 else
1156 setSchedulingPreference(Sched::Hybrid);
1157
1158 //// temporary - rewrite interface to use type
1159 MaxStoresPerMemset = 8;
1160 MaxStoresPerMemsetOptSize = 4;
1161 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1162 MaxStoresPerMemcpyOptSize = 2;
1163 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1164 MaxStoresPerMemmoveOptSize = 2;
1165
1166 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1167 // are at least 4 bytes aligned.
1168 setMinStackArgumentAlignment(4);
1169
1170 // Prefer likely predicted branches to selects on out-of-order cores.
1171 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1172
1173 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1174}
1175
1176bool ARMTargetLowering::useSoftFloat() const {
1177 return Subtarget->useSoftFloat();
1178}
1179
1180// FIXME: It might make sense to define the representative register class as the
1181// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1182// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1183// SPR's representative would be DPR_VFP2. This should work well if register
1184// pressure tracking were modified such that a register use would increment the
1185// pressure of the register class's representative and all of it's super
1186// classes' representatives transitively. We have not implemented this because
1187// of the difficulty prior to coalescing of modeling operand register classes
1188// due to the common occurrence of cross class copies and subregister insertions
1189// and extractions.
1190std::pair<const TargetRegisterClass *, uint8_t>
1191ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1192 MVT VT) const {
1193 const TargetRegisterClass *RRC = nullptr;
1194 uint8_t Cost = 1;
1195 switch (VT.SimpleTy) {
1196 default:
1197 return TargetLowering::findRepresentativeClass(TRI, VT);
1198 // Use DPR as representative register class for all floating point
1199 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1200 // the cost is 1 for both f32 and f64.
1201 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1202 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1203 RRC = &ARM::DPRRegClass;
1204 // When NEON is used for SP, only half of the register file is available
1205 // because operations that define both SP and DP results will be constrained
1206 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1207 // coalescing by double-counting the SP regs. See the FIXME above.
1208 if (Subtarget->useNEONForSinglePrecisionFP())
1209 Cost = 2;
1210 break;
1211 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1212 case MVT::v4f32: case MVT::v2f64:
1213 RRC = &ARM::DPRRegClass;
1214 Cost = 2;
1215 break;
1216 case MVT::v4i64:
1217 RRC = &ARM::DPRRegClass;
1218 Cost = 4;
1219 break;
1220 case MVT::v8i64:
1221 RRC = &ARM::DPRRegClass;
1222 Cost = 8;
1223 break;
1224 }
1225 return std::make_pair(RRC, Cost);
1226}
1227
1228const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1229 switch ((ARMISD::NodeType)Opcode) {
1230 case ARMISD::FIRST_NUMBER: break;
1231 case ARMISD::Wrapper: return "ARMISD::Wrapper";
1232 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1233 case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1234 case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1235 case ARMISD::CALL: return "ARMISD::CALL";
1236 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1237 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1238 case ARMISD::BRCOND: return "ARMISD::BRCOND";
1239 case ARMISD::BR_JT: return "ARMISD::BR_JT";
1240 case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1241 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1242 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1243 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1244 case ARMISD::CMP: return "ARMISD::CMP";
1245 case ARMISD::CMN: return "ARMISD::CMN";
1246 case ARMISD::CMPZ: return "ARMISD::CMPZ";
1247 case ARMISD::CMPFP: return "ARMISD::CMPFP";
1248 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1249 case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1250 case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1251
1252 case ARMISD::CMOV: return "ARMISD::CMOV";
1253
1254 case ARMISD::SSAT: return "ARMISD::SSAT";
1255
1256 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1257 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1258 case ARMISD::RRX: return "ARMISD::RRX";
1259
1260 case ARMISD::ADDC: return "ARMISD::ADDC";
1261 case ARMISD::ADDE: return "ARMISD::ADDE";
1262 case ARMISD::SUBC: return "ARMISD::SUBC";
1263 case ARMISD::SUBE: return "ARMISD::SUBE";
1264
1265 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1266 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1267
1268 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1269 case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1270 case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1271
1272 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1273
1274 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1275
1276 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1277
1278 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1279
1280 case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1281
1282 case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1283 case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1284
1285 case ARMISD::VCEQ: return "ARMISD::VCEQ";
1286 case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1287 case ARMISD::VCGE: return "ARMISD::VCGE";
1288 case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1289 case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1290 case ARMISD::VCGEU: return "ARMISD::VCGEU";
1291 case ARMISD::VCGT: return "ARMISD::VCGT";
1292 case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1293 case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1294 case ARMISD::VCGTU: return "ARMISD::VCGTU";
1295 case ARMISD::VTST: return "ARMISD::VTST";
1296
1297 case ARMISD::VSHL: return "ARMISD::VSHL";
1298 case ARMISD::VSHRs: return "ARMISD::VSHRs";
1299 case ARMISD::VSHRu: return "ARMISD::VSHRu";
1300 case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1301 case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1302 case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1303 case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1304 case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1305 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1306 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1307 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1308 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1309 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1310 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1311 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1312 case ARMISD::VSLI: return "ARMISD::VSLI";
1313 case ARMISD::VSRI: return "ARMISD::VSRI";
1314 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1315 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1316 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1317 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1318 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1319 case ARMISD::VDUP: return "ARMISD::VDUP";
1320 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1321 case ARMISD::VEXT: return "ARMISD::VEXT";
1322 case ARMISD::VREV64: return "ARMISD::VREV64";
1323 case ARMISD::VREV32: return "ARMISD::VREV32";
1324 case ARMISD::VREV16: return "ARMISD::VREV16";
1325 case ARMISD::VZIP: return "ARMISD::VZIP";
1326 case ARMISD::VUZP: return "ARMISD::VUZP";
1327 case ARMISD::VTRN: return "ARMISD::VTRN";
1328 case ARMISD::VTBL1: return "ARMISD::VTBL1";
1329 case ARMISD::VTBL2: return "ARMISD::VTBL2";
1330 case ARMISD::VMULLs: return "ARMISD::VMULLs";
1331 case ARMISD::VMULLu: return "ARMISD::VMULLu";
1332 case ARMISD::UMAAL: return "ARMISD::UMAAL";
1333 case ARMISD::UMLAL: return "ARMISD::UMLAL";
1334 case ARMISD::SMLAL: return "ARMISD::SMLAL";
1335 case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1336 case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1337 case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1338 case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1339 case ARMISD::SMULWB: return "ARMISD::SMULWB";
1340 case ARMISD::SMULWT: return "ARMISD::SMULWT";
1341 case ARMISD::SMLALD: return "ARMISD::SMLALD";
1342 case ARMISD::SMLALDX: return "ARMISD::SMLALDX";
1343 case ARMISD::SMLSLD: return "ARMISD::SMLSLD";
1344 case ARMISD::SMLSLDX: return "ARMISD::SMLSLDX";
1345 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1346 case ARMISD::BFI: return "ARMISD::BFI";
1347 case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1348 case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1349 case ARMISD::VBSL: return "ARMISD::VBSL";
1350 case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1351 case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1352 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1353 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1354 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1355 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1356 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1357 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1358 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1359 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1360 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1361 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1362 case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1363 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1364 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1365 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1366 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1367 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1368 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1369 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1370 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1371 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1372 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1373 }
1374 return nullptr;
1375}
1376
1377EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1378 EVT VT) const {
1379 if (!VT.isVector())
1380 return getPointerTy(DL);
1381 return VT.changeVectorElementTypeToInteger();
1382}
1383
1384/// getRegClassFor - Return the register class that should be used for the
1385/// specified value type.
1386const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
1387 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1388 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1389 // load / store 4 to 8 consecutive D registers.
1390 if (Subtarget->hasNEON()) {
1391 if (VT == MVT::v4i64)
1392 return &ARM::QQPRRegClass;
1393 if (VT == MVT::v8i64)
1394 return &ARM::QQQQPRRegClass;
1395 }
1396 return TargetLowering::getRegClassFor(VT);
1397}
1398
1399// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1400// source/dest is aligned and the copy size is large enough. We therefore want
1401// to align such objects passed to memory intrinsics.
1402bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
1403 unsigned &PrefAlign) const {
1404 if (!isa<MemIntrinsic>(CI))
1405 return false;
1406 MinSize = 8;
1407 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1408 // cycle faster than 4-byte aligned LDM.
1409 PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1410 return true;
1411}
1412
1413// Create a fast isel object.
1414FastISel *
1415ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1416 const TargetLibraryInfo *libInfo) const {
1417 return ARM::createFastISel(funcInfo, libInfo);
1418}
1419
1420Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1421 unsigned NumVals = N->getNumValues();
1422 if (!NumVals)
1423 return Sched::RegPressure;
1424
1425 for (unsigned i = 0; i != NumVals; ++i) {
1426 EVT VT = N->getValueType(i);
1427 if (VT == MVT::Glue || VT == MVT::Other)
1428 continue;
1429 if (VT.isFloatingPoint() || VT.isVector())
1430 return Sched::ILP;
1431 }
1432
1433 if (!N->isMachineOpcode())
1434 return Sched::RegPressure;
1435
1436 // Load are scheduled for latency even if there instruction itinerary
1437 // is not available.
1438 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1439 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1440
1441 if (MCID.getNumDefs() == 0)
1442 return Sched::RegPressure;
1443 if (!Itins->isEmpty() &&
1444 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1445 return Sched::ILP;
1446
1447 return Sched::RegPressure;
1448}
1449
1450//===----------------------------------------------------------------------===//
1451// Lowering Code
1452//===----------------------------------------------------------------------===//
1453
1454static bool isSRL16(const SDValue &Op) {
1455 if (Op.getOpcode() != ISD::SRL)
1456 return false;
1457 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1458 return Const->getZExtValue() == 16;
1459 return false;
1460}
1461
1462static bool isSRA16(const SDValue &Op) {
1463 if (Op.getOpcode() != ISD::SRA)
1464 return false;
1465 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1466 return Const->getZExtValue() == 16;
1467 return false;
1468}
1469
1470static bool isSHL16(const SDValue &Op) {
1471 if (Op.getOpcode() != ISD::SHL)
1472 return false;
1473 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1474 return Const->getZExtValue() == 16;
1475 return false;
1476}
1477
1478// Check for a signed 16-bit value. We special case SRA because it makes it
1479// more simple when also looking for SRAs that aren't sign extending a
1480// smaller value. Without the check, we'd need to take extra care with
1481// checking order for some operations.
1482static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1483 if (isSRA16(Op))
1484 return isSHL16(Op.getOperand(0));
1485 return DAG.ComputeNumSignBits(Op) == 17;
1486}
1487
1488/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1489static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1490 switch (CC) {
1491 default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1491)
;
1492 case ISD::SETNE: return ARMCC::NE;
1493 case ISD::SETEQ: return ARMCC::EQ;
1494 case ISD::SETGT: return ARMCC::GT;
1495 case ISD::SETGE: return ARMCC::GE;
1496 case ISD::SETLT: return ARMCC::LT;
1497 case ISD::SETLE: return ARMCC::LE;
1498 case ISD::SETUGT: return ARMCC::HI;
1499 case ISD::SETUGE: return ARMCC::HS;
1500 case ISD::SETULT: return ARMCC::LO;
1501 case ISD::SETULE: return ARMCC::LS;
1502 }
1503}
1504
1505/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1506static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1507 ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1508 CondCode2 = ARMCC::AL;
1509 InvalidOnQNaN = true;
1510 switch (CC) {
1511 default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1511)
;
1512 case ISD::SETEQ:
1513 case ISD::SETOEQ:
1514 CondCode = ARMCC::EQ;
1515 InvalidOnQNaN = false;
1516 break;
1517 case ISD::SETGT:
1518 case ISD::SETOGT: CondCode = ARMCC::GT; break;
1519 case ISD::SETGE:
1520 case ISD::SETOGE: CondCode = ARMCC::GE; break;
1521 case ISD::SETOLT: CondCode = ARMCC::MI; break;
1522 case ISD::SETOLE: CondCode = ARMCC::LS; break;
1523 case ISD::SETONE:
1524 CondCode = ARMCC::MI;
1525 CondCode2 = ARMCC::GT;
1526 InvalidOnQNaN = false;
1527 break;
1528 case ISD::SETO: CondCode = ARMCC::VC; break;
1529 case ISD::SETUO: CondCode = ARMCC::VS; break;
1530 case ISD::SETUEQ:
1531 CondCode = ARMCC::EQ;
1532 CondCode2 = ARMCC::VS;
1533 InvalidOnQNaN = false;
1534 break;
1535 case ISD::SETUGT: CondCode = ARMCC::HI; break;
1536 case ISD::SETUGE: CondCode = ARMCC::PL; break;
1537 case ISD::SETLT:
1538 case ISD::SETULT: CondCode = ARMCC::LT; break;
1539 case ISD::SETLE:
1540 case ISD::SETULE: CondCode = ARMCC::LE; break;
1541 case ISD::SETNE:
1542 case ISD::SETUNE:
1543 CondCode = ARMCC::NE;
1544 InvalidOnQNaN = false;
1545 break;
1546 }
1547}
1548
1549//===----------------------------------------------------------------------===//
1550// Calling Convention Implementation
1551//===----------------------------------------------------------------------===//
1552
1553#include "ARMGenCallingConv.inc"
1554
1555/// getEffectiveCallingConv - Get the effective calling convention, taking into
1556/// account presence of floating point hardware and calling convention
1557/// limitations, such as support for variadic functions.
1558CallingConv::ID
1559ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1560 bool isVarArg) const {
1561 switch (CC) {
1562 default:
1563 report_fatal_error("Unsupported calling convention");
1564 case CallingConv::ARM_AAPCS:
1565 case CallingConv::ARM_APCS:
1566 case CallingConv::GHC:
1567 return CC;
1568 case CallingConv::PreserveMost:
1569 return CallingConv::PreserveMost;
1570 case CallingConv::ARM_AAPCS_VFP:
1571 case CallingConv::Swift:
1572 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
1573 case CallingConv::C:
1574 if (!Subtarget->isAAPCS_ABI())
1575 return CallingConv::ARM_APCS;
1576 else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1577 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1578 !isVarArg)
1579 return CallingConv::ARM_AAPCS_VFP;
1580 else
1581 return CallingConv::ARM_AAPCS;
1582 case CallingConv::Fast:
1583 case CallingConv::CXX_FAST_TLS:
1584 if (!Subtarget->isAAPCS_ABI()) {
1585 if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1586 return CallingConv::Fast;
1587 return CallingConv::ARM_APCS;
1588 } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1589 return CallingConv::ARM_AAPCS_VFP;
1590 else
1591 return CallingConv::ARM_AAPCS;
1592 }
1593}
1594
1595CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
1596 bool isVarArg) const {
1597 return CCAssignFnForNode(CC, false, isVarArg);
1598}
1599
1600CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
1601 bool isVarArg) const {
1602 return CCAssignFnForNode(CC, true, isVarArg);
1603}
1604
1605/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1606/// CallingConvention.
1607CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1608 bool Return,
1609 bool isVarArg) const {
1610 switch (getEffectiveCallingConv(CC, isVarArg)) {
1611 default:
1612 report_fatal_error("Unsupported calling convention");
1613 case CallingConv::ARM_APCS:
1614 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1615 case CallingConv::ARM_AAPCS:
1616 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1617 case CallingConv::ARM_AAPCS_VFP:
1618 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1619 case CallingConv::Fast:
1620 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1621 case CallingConv::GHC:
1622 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1623 case CallingConv::PreserveMost:
1624 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1625 }
1626}
1627
1628/// LowerCallResult - Lower the result values of a call into the
1629/// appropriate copies out of appropriate physical registers.
1630SDValue ARMTargetLowering::LowerCallResult(
1631 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1632 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1633 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1634 SDValue ThisVal) const {
1635 // Assign locations to each value returned by this call.
1636 SmallVector<CCValAssign, 16> RVLocs;
1637 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1638 *DAG.getContext());
1639 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1640
1641 // Copy all of the result registers out of their specified physreg.
1642 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1643 CCValAssign VA = RVLocs[i];
1644
1645 // Pass 'this' value directly from the argument to return value, to avoid
1646 // reg unit interference
1647 if (i == 0 && isThisReturn) {
1648 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i32 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1649, __extension__ __PRETTY_FUNCTION__))
1649 "unexpected return calling convention register assignment")(static_cast <bool> (!VA.needsCustom() && VA.getLocVT
() == MVT::i32 && "unexpected return calling convention register assignment"
) ? void (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1649, __extension__ __PRETTY_FUNCTION__))
;
1650 InVals.push_back(ThisVal);
1651 continue;
1652 }
1653
1654 SDValue Val;
1655 if (VA.needsCustom()) {
1656 // Handle f64 or half of a v2f64.
1657 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1658 InFlag);
1659 Chain = Lo.getValue(1);
1660 InFlag = Lo.getValue(2);
1661 VA = RVLocs[++i]; // skip ahead to next loc
1662 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1663 InFlag);
1664 Chain = Hi.getValue(1);
1665 InFlag = Hi.getValue(2);
1666 if (!Subtarget->isLittle())
1667 std::swap (Lo, Hi);
1668 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1669
1670 if (VA.getLocVT() == MVT::v2f64) {
1671 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1672 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1673 DAG.getConstant(0, dl, MVT::i32));
1674
1675 VA = RVLocs[++i]; // skip ahead to next loc
1676 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1677 Chain = Lo.getValue(1);
1678 InFlag = Lo.getValue(2);
1679 VA = RVLocs[++i]; // skip ahead to next loc
1680 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1681 Chain = Hi.getValue(1);
1682 InFlag = Hi.getValue(2);
1683 if (!Subtarget->isLittle())
1684 std::swap (Lo, Hi);
1685 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1686 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1687 DAG.getConstant(1, dl, MVT::i32));
1688 }
1689 } else {
1690 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1691 InFlag);
1692 Chain = Val.getValue(1);
1693 InFlag = Val.getValue(2);
1694 }
1695
1696 switch (VA.getLocInfo()) {
1697 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1697)
;
1698 case CCValAssign::Full: break;
1699 case CCValAssign::BCvt:
1700 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1701 break;
1702 }
1703
1704 InVals.push_back(Val);
1705 }
1706
1707 return Chain;
1708}
1709
1710/// LowerMemOpCallTo - Store the argument to the stack.
1711SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1712 SDValue Arg, const SDLoc &dl,
1713 SelectionDAG &DAG,
1714 const CCValAssign &VA,
1715 ISD::ArgFlagsTy Flags) const {
1716 unsigned LocMemOffset = VA.getLocMemOffset();
1717 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1718 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1719 StackPtr, PtrOff);
1720 return DAG.getStore(
1721 Chain, dl, Arg, PtrOff,
1722 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1723}
1724
1725void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1726 SDValue Chain, SDValue &Arg,
1727 RegsToPassVector &RegsToPass,
1728 CCValAssign &VA, CCValAssign &NextVA,
1729 SDValue &StackPtr,
1730 SmallVectorImpl<SDValue> &MemOpChains,
1731 ISD::ArgFlagsTy Flags) const {
1732 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1733 DAG.getVTList(MVT::i32, MVT::i32), Arg);
1734 unsigned id = Subtarget->isLittle() ? 0 : 1;
1735 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1736
1737 if (NextVA.isRegLoc())
1738 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1739 else {
1740 assert(NextVA.isMemLoc())(static_cast <bool> (NextVA.isMemLoc()) ? void (0) : __assert_fail
("NextVA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1740, __extension__ __PRETTY_FUNCTION__))
;
1741 if (!StackPtr.getNode())
1742 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1743 getPointerTy(DAG.getDataLayout()));
1744
1745 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1746 dl, DAG, NextVA,
1747 Flags));
1748 }
1749}
1750
1751/// LowerCall - Lowering a call into a callseq_start <-
1752/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1753/// nodes.
1754SDValue
1755ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1756 SmallVectorImpl<SDValue> &InVals) const {
1757 SelectionDAG &DAG = CLI.DAG;
1758 SDLoc &dl = CLI.DL;
1759 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1760 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1761 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1762 SDValue Chain = CLI.Chain;
1763 SDValue Callee = CLI.Callee;
1764 bool &isTailCall = CLI.IsTailCall;
1765 CallingConv::ID CallConv = CLI.CallConv;
1766 bool doesNotRet = CLI.DoesNotReturn;
1767 bool isVarArg = CLI.IsVarArg;
1768
1769 MachineFunction &MF = DAG.getMachineFunction();
1770 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1771 bool isThisReturn = false;
1772 bool isSibCall = false;
1773 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
1774
1775 // Disable tail calls if they're not supported.
1776 if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1777 isTailCall = false;
1778
1779 if (isTailCall) {
1780 // Check if it's really possible to do a tail call.
1781 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1782 isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
1783 Outs, OutVals, Ins, DAG);
1784 if (!isTailCall && CLI.CS && CLI.CS.isMustTailCall())
1785 report_fatal_error("failed to perform tail call elimination on a call "
1786 "site marked musttail");
1787 // We don't support GuaranteedTailCallOpt for ARM, only automatically
1788 // detected sibcalls.
1789 if (isTailCall) {
1790 ++NumTailCalls;
1791 isSibCall = true;
1792 }
1793 }
1794
1795 // Analyze operands of the call, assigning locations to each operand.
1796 SmallVector<CCValAssign, 16> ArgLocs;
1797 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1798 *DAG.getContext());
1799 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1800
1801 // Get a count of how many bytes are to be pushed on the stack.
1802 unsigned NumBytes = CCInfo.getNextStackOffset();
1803
1804 // For tail calls, memory operands are available in our caller's stack.
1805 if (isSibCall)
1806 NumBytes = 0;
1807
1808 // Adjust the stack pointer for the new arguments...
1809 // These operations are automatically eliminated by the prolog/epilog pass
1810 if (!isSibCall)
1811 Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
1812
1813 SDValue StackPtr =
1814 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1815
1816 RegsToPassVector RegsToPass;
1817 SmallVector<SDValue, 8> MemOpChains;
1818
1819 // Walk the register/memloc assignments, inserting copies/loads. In the case
1820 // of tail call optimization, arguments are handled later.
1821 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1822 i != e;
1823 ++i, ++realArgIdx) {
1824 CCValAssign &VA = ArgLocs[i];
1825 SDValue Arg = OutVals[realArgIdx];
1826 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1827 bool isByVal = Flags.isByVal();
1828
1829 // Promote the value if needed.
1830 switch (VA.getLocInfo()) {
1831 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1831)
;
1832 case CCValAssign::Full: break;
1833 case CCValAssign::SExt:
1834 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1835 break;
1836 case CCValAssign::ZExt:
1837 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1838 break;
1839 case CCValAssign::AExt:
1840 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1841 break;
1842 case CCValAssign::BCvt:
1843 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1844 break;
1845 }
1846
1847 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1848 if (VA.needsCustom()) {
1849 if (VA.getLocVT() == MVT::v2f64) {
1850 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1851 DAG.getConstant(0, dl, MVT::i32));
1852 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1853 DAG.getConstant(1, dl, MVT::i32));
1854
1855 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1856 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1857
1858 VA = ArgLocs[++i]; // skip ahead to next loc
1859 if (VA.isRegLoc()) {
1860 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1861 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1862 } else {
1863 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1863, __extension__ __PRETTY_FUNCTION__))
;
1864
1865 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1866 dl, DAG, VA, Flags));
1867 }
1868 } else {
1869 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1870 StackPtr, MemOpChains, Flags);
1871 }
1872 } else if (VA.isRegLoc()) {
1873 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1874 Outs[0].VT == MVT::i32) {
1875 assert(VA.getLocVT() == MVT::i32 &&(static_cast <bool> (VA.getLocVT() == MVT::i32 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1876, __extension__ __PRETTY_FUNCTION__))
1876 "unexpected calling convention register assignment")(static_cast <bool> (VA.getLocVT() == MVT::i32 &&
"unexpected calling convention register assignment") ? void (
0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1876, __extension__ __PRETTY_FUNCTION__))
;
1877 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i32 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1878, __extension__ __PRETTY_FUNCTION__))
1878 "unexpected use of 'returned'")(static_cast <bool> (!Ins.empty() && Ins[0].VT ==
MVT::i32 && "unexpected use of 'returned'") ? void (
0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1878, __extension__ __PRETTY_FUNCTION__))
;
1879 isThisReturn = true;
1880 }
1881 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1882 } else if (isByVal) {
1883 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1883, __extension__ __PRETTY_FUNCTION__))
;
1884 unsigned offset = 0;
1885
1886 // True if this byval aggregate will be split between registers
1887 // and memory.
1888 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1889 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1890
1891 if (CurByValIdx < ByValArgsCount) {
1892
1893 unsigned RegBegin, RegEnd;
1894 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1895
1896 EVT PtrVT =
1897 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
1898 unsigned int i, j;
1899 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1900 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1901 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1902 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1903 MachinePointerInfo(),
1904 DAG.InferPtrAlignment(AddArg));
1905 MemOpChains.push_back(Load.getValue(1));
1906 RegsToPass.push_back(std::make_pair(j, Load));
1907 }
1908
1909 // If parameter size outsides register area, "offset" value
1910 // helps us to calculate stack slot for remained part properly.
1911 offset = RegEnd - RegBegin;
1912
1913 CCInfo.nextInRegsParam();
1914 }
1915
1916 if (Flags.getByValSize() > 4*offset) {
1917 auto PtrVT = getPointerTy(DAG.getDataLayout());
1918 unsigned LocMemOffset = VA.getLocMemOffset();
1919 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1920 SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1921 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1922 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1923 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1924 MVT::i32);
1925 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1926 MVT::i32);
1927
1928 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1929 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1930 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1931 Ops));
1932 }
1933 } else if (!isSibCall) {
1934 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1934, __extension__ __PRETTY_FUNCTION__))
;
1935
1936 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1937 dl, DAG, VA, Flags));
1938 }
1939 }
1940
1941 if (!MemOpChains.empty())
1942 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1943
1944 // Build a sequence of copy-to-reg nodes chained together with token chain
1945 // and flag operands which copy the outgoing args into the appropriate regs.
1946 SDValue InFlag;
1947 // Tail call byval lowering might overwrite argument registers so in case of
1948 // tail call optimization the copies to registers are lowered later.
1949 if (!isTailCall)
1950 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1951 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1952 RegsToPass[i].second, InFlag);
1953 InFlag = Chain.getValue(1);
1954 }
1955
1956 // For tail calls lower the arguments to the 'real' stack slot.
1957 if (isTailCall) {
1958 // Force all the incoming stack arguments to be loaded from the stack
1959 // before any new outgoing arguments are stored to the stack, because the
1960 // outgoing stack slots may alias the incoming argument stack slots, and
1961 // the alias isn't otherwise explicit. This is slightly more conservative
1962 // than necessary, because it means that each store effectively depends
1963 // on every argument instead of just those arguments it would clobber.
1964
1965 // Do not flag preceding copytoreg stuff together with the following stuff.
1966 InFlag = SDValue();
1967 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1968 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1969 RegsToPass[i].second, InFlag);
1970 InFlag = Chain.getValue(1);
1971 }
1972 InFlag = SDValue();
1973 }
1974
1975 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1976 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1977 // node so that legalize doesn't hack it.
1978 bool isDirect = false;
1979
1980 const TargetMachine &TM = getTargetMachine();
1981 const Module *Mod = MF.getFunction()->getParent();
1982 const GlobalValue *GV = nullptr;
1983 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1984 GV = G->getGlobal();
1985 bool isStub =
1986 !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
1987
1988 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1989 bool isLocalARMFunc = false;
1990 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1991 auto PtrVt = getPointerTy(DAG.getDataLayout());
1992
1993 if (Subtarget->genLongCalls()) {
1994 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(static_cast <bool> ((!isPositionIndependent() || Subtarget
->isTargetWindows()) && "long-calls codegen is not position independent!"
) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1995, __extension__ __PRETTY_FUNCTION__))
1995 "long-calls codegen is not position independent!")(static_cast <bool> ((!isPositionIndependent() || Subtarget
->isTargetWindows()) && "long-calls codegen is not position independent!"
) ? void (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 1995, __extension__ __PRETTY_FUNCTION__))
;
1996 // Handle a global address or an external symbol. If it's not one of
1997 // those, the target's already in a register, so we don't need to do
1998 // anything extra.
1999 if (isa<GlobalAddressSDNode>(Callee)) {
2000 // Create a constant pool entry for the callee address
2001 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2002 ARMConstantPoolValue *CPV =
2003 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2004
2005 // Get the address of the callee into a register
2006 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2007 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2008 Callee = DAG.getLoad(
2009 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2010 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2011 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2012 const char *Sym = S->getSymbol();
2013
2014 // Create a constant pool entry for the callee address
2015 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2016 ARMConstantPoolValue *CPV =
2017 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2018 ARMPCLabelIndex, 0);
2019 // Get the address of the callee into a register
2020 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2021 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2022 Callee = DAG.getLoad(
2023 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2024 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2025 }
2026 } else if (isa<GlobalAddressSDNode>(Callee)) {
2027 // If we're optimizing for minimum size and the function is called three or
2028 // more times in this block, we can improve codesize by calling indirectly
2029 // as BLXr has a 16-bit encoding.
2030 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2031 auto *BB = CLI.CS.getParent();
2032 bool PreferIndirect =
2033 Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
2034 count_if(GV->users(), [&BB](const User *U) {
2035 return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2036 }) > 2;
2037
2038 if (!PreferIndirect) {
2039 isDirect = true;
2040 bool isDef = GV->isStrongDefinitionForLinker();
2041
2042 // ARM call to a local ARM function is predicable.
2043 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2044 // tBX takes a register source operand.
2045 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2046 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")(static_cast <bool> (Subtarget->isTargetMachO() &&
"WrapperPIC use on non-MachO?") ? void (0) : __assert_fail (
"Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2046, __extension__ __PRETTY_FUNCTION__))
;
2047 Callee = DAG.getNode(
2048 ARMISD::WrapperPIC, dl, PtrVt,
2049 DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2050 Callee = DAG.getLoad(
2051 PtrVt, dl, DAG.getEntryNode(), Callee,
2052 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2053 /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2054 MachineMemOperand::MOInvariant);
2055 } else if (Subtarget->isTargetCOFF()) {
2056 assert(Subtarget->isTargetWindows() &&(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2057, __extension__ __PRETTY_FUNCTION__))
2057 "Windows is the only supported COFF target")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows is the only supported COFF target") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2057, __extension__ __PRETTY_FUNCTION__))
;
2058 unsigned TargetFlags = GV->hasDLLImportStorageClass()
2059 ? ARMII::MO_DLLIMPORT
2060 : ARMII::MO_NO_FLAG;
2061 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2062 TargetFlags);
2063 if (GV->hasDLLImportStorageClass())
2064 Callee =
2065 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2066 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2067 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2068 } else {
2069 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2070 }
2071 }
2072 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2073 isDirect = true;
2074 // tBX takes a register source operand.
2075 const char *Sym = S->getSymbol();
2076 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2077 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2078 ARMConstantPoolValue *CPV =
2079 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2080 ARMPCLabelIndex, 4);
2081 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2082 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2083 Callee = DAG.getLoad(
2084 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2085 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2086 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2087 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2088 } else {
2089 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2090 }
2091 }
2092
2093 // FIXME: handle tail calls differently.
2094 unsigned CallOpc;
2095 if (Subtarget->isThumb()) {
2096 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2097 CallOpc = ARMISD::CALL_NOLINK;
2098 else
2099 CallOpc = ARMISD::CALL;
2100 } else {
2101 if (!isDirect && !Subtarget->hasV5TOps())
2102 CallOpc = ARMISD::CALL_NOLINK;
2103 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2104 // Emit regular call when code size is the priority
2105 !MF.getFunction()->optForMinSize())
2106 // "mov lr, pc; b _foo" to avoid confusing the RSP
2107 CallOpc = ARMISD::CALL_NOLINK;
2108 else
2109 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2110 }
2111
2112 std::vector<SDValue> Ops;
2113 Ops.push_back(Chain);
2114 Ops.push_back(Callee);
2115
2116 // Add argument registers to the end of the list so that they are known live
2117 // into the call.
2118 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2119 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2120 RegsToPass[i].second.getValueType()));
2121
2122 // Add a register mask operand representing the call-preserved registers.
2123 if (!isTailCall) {
2124 const uint32_t *Mask;
2125 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2126 if (isThisReturn) {
2127 // For 'this' returns, use the R0-preserving mask if applicable
2128 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2129 if (!Mask) {
2130 // Set isThisReturn to false if the calling convention is not one that
2131 // allows 'returned' to be modeled in this way, so LowerCallResult does
2132 // not try to pass 'this' straight through
2133 isThisReturn = false;
2134 Mask = ARI->getCallPreservedMask(MF, CallConv);
2135 }
2136 } else
2137 Mask = ARI->getCallPreservedMask(MF, CallConv);
2138
2139 assert(Mask && "Missing call preserved mask for calling convention")(static_cast <bool> (Mask && "Missing call preserved mask for calling convention"
) ? void (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2139, __extension__ __PRETTY_FUNCTION__))
;
2140 Ops.push_back(DAG.getRegisterMask(Mask));
2141 }
2142
2143 if (InFlag.getNode())
2144 Ops.push_back(InFlag);
2145
2146 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2147 if (isTailCall) {
2148 MF.getFrameInfo().setHasTailCall();
2149 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2150 }
2151
2152 // Returns a chain and a flag for retval copy to use.
2153 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2154 InFlag = Chain.getValue(1);
2155
2156 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2157 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2158 if (!Ins.empty())
2159 InFlag = Chain.getValue(1);
2160
2161 // Handle result values, copying them out of physregs into vregs that we
2162 // return.
2163 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2164 InVals, isThisReturn,
2165 isThisReturn ? OutVals[0] : SDValue());
2166}
2167
2168/// HandleByVal - Every parameter *after* a byval parameter is passed
2169/// on the stack. Remember the next parameter register to allocate,
2170/// and then confiscate the rest of the parameter registers to insure
2171/// this.
2172void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2173 unsigned Align) const {
2174 // Byval (as with any stack) slots are always at least 4 byte aligned.
2175 Align = std::max(Align, 4U);
2176
2177 unsigned Reg = State->AllocateReg(GPRArgRegs);
2178 if (!Reg)
2179 return;
2180
2181 unsigned AlignInRegs = Align / 4;
2182 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2183 for (unsigned i = 0; i < Waste; ++i)
2184 Reg = State->AllocateReg(GPRArgRegs);
2185
2186 if (!Reg)
2187 return;
2188
2189 unsigned Excess = 4 * (ARM::R4 - Reg);
2190
2191 // Special case when NSAA != SP and parameter size greater than size of
2192 // all remained GPR regs. In that case we can't split parameter, we must
2193 // send it to stack. We also must set NCRN to R4, so waste all
2194 // remained registers.
2195 const unsigned NSAAOffset = State->getNextStackOffset();
2196 if (NSAAOffset != 0 && Size > Excess) {
2197 while (State->AllocateReg(GPRArgRegs))
2198 ;
2199 return;
2200 }
2201
2202 // First register for byval parameter is the first register that wasn't
2203 // allocated before this method call, so it would be "reg".
2204 // If parameter is small enough to be saved in range [reg, r4), then
2205 // the end (first after last) register would be reg + param-size-in-regs,
2206 // else parameter would be splitted between registers and stack,
2207 // end register would be r4 in this case.
2208 unsigned ByValRegBegin = Reg;
2209 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2210 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2211 // Note, first register is allocated in the beginning of function already,
2212 // allocate remained amount of registers we need.
2213 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2214 State->AllocateReg(GPRArgRegs);
2215 // A byval parameter that is split between registers and memory needs its
2216 // size truncated here.
2217 // In the case where the entire structure fits in registers, we set the
2218 // size in memory to zero.
2219 Size = std::max<int>(Size - Excess, 0);
2220}
2221
2222/// MatchingStackOffset - Return true if the given stack call argument is
2223/// already available in the same position (relatively) of the caller's
2224/// incoming argument stack.
2225static
2226bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2227 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2228 const TargetInstrInfo *TII) {
2229 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2230 int FI = std::numeric_limits<int>::max();
2231 if (Arg.getOpcode() == ISD::CopyFromReg) {
2232 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2233 if (!TargetRegisterInfo::isVirtualRegister(VR))
2234 return false;
2235 MachineInstr *Def = MRI->getVRegDef(VR);
2236 if (!Def)
2237 return false;
2238 if (!Flags.isByVal()) {
2239 if (!TII->isLoadFromStackSlot(*Def, FI))
2240 return false;
2241 } else {
2242 return false;
2243 }
2244 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2245 if (Flags.isByVal())
2246 // ByVal argument is passed in as a pointer but it's now being
2247 // dereferenced. e.g.
2248 // define @foo(%struct.X* %A) {
2249 // tail call @bar(%struct.X* byval %A)
2250 // }
2251 return false;
2252 SDValue Ptr = Ld->getBasePtr();
2253 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2254 if (!FINode)
2255 return false;
2256 FI = FINode->getIndex();
2257 } else
2258 return false;
2259
2260 assert(FI != std::numeric_limits<int>::max())(static_cast <bool> (FI != std::numeric_limits<int>
::max()) ? void (0) : __assert_fail ("FI != std::numeric_limits<int>::max()"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2260, __extension__ __PRETTY_FUNCTION__))
;
2261 if (!MFI.isFixedObjectIndex(FI))
2262 return false;
2263 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2264}
2265
2266/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2267/// for tail call optimization. Targets which want to do tail call
2268/// optimization should implement this function.
2269bool
2270ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2271 CallingConv::ID CalleeCC,
2272 bool isVarArg,
2273 bool isCalleeStructRet,
2274 bool isCallerStructRet,
2275 const SmallVectorImpl<ISD::OutputArg> &Outs,
2276 const SmallVectorImpl<SDValue> &OutVals,
2277 const SmallVectorImpl<ISD::InputArg> &Ins,
2278 SelectionDAG& DAG) const {
2279 MachineFunction &MF = DAG.getMachineFunction();
2280 const Function *CallerF = MF.getFunction();
2281 CallingConv::ID CallerCC = CallerF->getCallingConv();
2282
2283 assert(Subtarget->supportsTailCall())(static_cast <bool> (Subtarget->supportsTailCall()) ?
void (0) : __assert_fail ("Subtarget->supportsTailCall()"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2283, __extension__ __PRETTY_FUNCTION__))
;
2284
2285 // Look for obvious safe cases to perform tail call optimization that do not
2286 // require ABI changes. This is what gcc calls sibcall.
2287
2288 // Exception-handling functions need a special set of instructions to indicate
2289 // a return to the hardware. Tail-calling another function would probably
2290 // break this.
2291 if (CallerF->hasFnAttribute("interrupt"))
2292 return false;
2293
2294 // Also avoid sibcall optimization if either caller or callee uses struct
2295 // return semantics.
2296 if (isCalleeStructRet || isCallerStructRet)
2297 return false;
2298
2299 // Externally-defined functions with weak linkage should not be
2300 // tail-called on ARM when the OS does not support dynamic
2301 // pre-emption of symbols, as the AAELF spec requires normal calls
2302 // to undefined weak functions to be replaced with a NOP or jump to the
2303 // next instruction. The behaviour of branch instructions in this
2304 // situation (as used for tail calls) is implementation-defined, so we
2305 // cannot rely on the linker replacing the tail call with a return.
2306 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2307 const GlobalValue *GV = G->getGlobal();
2308 const Triple &TT = getTargetMachine().getTargetTriple();
2309 if (GV->hasExternalWeakLinkage() &&
2310 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2311 return false;
2312 }
2313
2314 // Check that the call results are passed in the same way.
2315 LLVMContext &C = *DAG.getContext();
2316 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2317 CCAssignFnForReturn(CalleeCC, isVarArg),
2318 CCAssignFnForReturn(CallerCC, isVarArg)))
2319 return false;
2320 // The callee has to preserve all registers the caller needs to preserve.
2321 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2322 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2323 if (CalleeCC != CallerCC) {
2324 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2325 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2326 return false;
2327 }
2328
2329 // If Caller's vararg or byval argument has been split between registers and
2330 // stack, do not perform tail call, since part of the argument is in caller's
2331 // local frame.
2332 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2333 if (AFI_Caller->getArgRegsSaveSize())
2334 return false;
2335
2336 // If the callee takes no arguments then go on to check the results of the
2337 // call.
2338 if (!Outs.empty()) {
2339 // Check if stack adjustment is needed. For now, do not do this if any
2340 // argument is passed on the stack.
2341 SmallVector<CCValAssign, 16> ArgLocs;
2342 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2343 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2344 if (CCInfo.getNextStackOffset()) {
2345 // Check if the arguments are already laid out in the right way as
2346 // the caller's fixed stack objects.
2347 MachineFrameInfo &MFI = MF.getFrameInfo();
2348 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2349 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2350 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2351 i != e;
2352 ++i, ++realArgIdx) {
2353 CCValAssign &VA = ArgLocs[i];
2354 EVT RegVT = VA.getLocVT();
2355 SDValue Arg = OutVals[realArgIdx];
2356 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2357 if (VA.getLocInfo() == CCValAssign::Indirect)
2358 return false;
2359 if (VA.needsCustom()) {
2360 // f64 and vector types are split into multiple registers or
2361 // register/stack-slot combinations. The types will not match
2362 // the registers; give up on memory f64 refs until we figure
2363 // out what to do about this.
2364 if (!VA.isRegLoc())
2365 return false;
2366 if (!ArgLocs[++i].isRegLoc())
2367 return false;
2368 if (RegVT == MVT::v2f64) {
2369 if (!ArgLocs[++i].isRegLoc())
2370 return false;
2371 if (!ArgLocs[++i].isRegLoc())
2372 return false;
2373 }
2374 } else if (!VA.isRegLoc()) {
2375 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2376 MFI, MRI, TII))
2377 return false;
2378 }
2379 }
2380 }
2381
2382 const MachineRegisterInfo &MRI = MF.getRegInfo();
2383 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2384 return false;
2385 }
2386
2387 return true;
2388}
2389
2390bool
2391ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2392 MachineFunction &MF, bool isVarArg,
2393 const SmallVectorImpl<ISD::OutputArg> &Outs,
2394 LLVMContext &Context) const {
2395 SmallVector<CCValAssign, 16> RVLocs;
2396 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2397 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2398}
2399
2400static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
2401 const SDLoc &DL, SelectionDAG &DAG) {
2402 const MachineFunction &MF = DAG.getMachineFunction();
2403 const Function *F = MF.getFunction();
2404
2405 StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
2406
2407 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2408 // version of the "preferred return address". These offsets affect the return
2409 // instruction if this is a return from PL1 without hypervisor extensions.
2410 // IRQ/FIQ: +4 "subs pc, lr, #4"
2411 // SWI: 0 "subs pc, lr, #0"
2412 // ABORT: +4 "subs pc, lr, #4"
2413 // UNDEF: +4/+2 "subs pc, lr, #0"
2414 // UNDEF varies depending on where the exception came from ARM or Thumb
2415 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2416
2417 int64_t LROffset;
2418 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2419 IntKind == "ABORT")
2420 LROffset = 4;
2421 else if (IntKind == "SWI" || IntKind == "UNDEF")
2422 LROffset = 0;
2423 else
2424 report_fatal_error("Unsupported interrupt attribute. If present, value "
2425 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2426
2427 RetOps.insert(RetOps.begin() + 1,
2428 DAG.getConstant(LROffset, DL, MVT::i32, false));
2429
2430 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2431}
2432
2433SDValue
2434ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2435 bool isVarArg,
2436 const SmallVectorImpl<ISD::OutputArg> &Outs,
2437 const SmallVectorImpl<SDValue> &OutVals,
2438 const SDLoc &dl, SelectionDAG &DAG) const {
2439 // CCValAssign - represent the assignment of the return value to a location.
2440 SmallVector<CCValAssign, 16> RVLocs;
2441
2442 // CCState - Info about the registers and stack slots.
2443 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2444 *DAG.getContext());
2445
2446 // Analyze outgoing return values.
2447 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2448
2449 SDValue Flag;
2450 SmallVector<SDValue, 4> RetOps;
2451 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2452 bool isLittleEndian = Subtarget->isLittle();
2453
2454 MachineFunction &MF = DAG.getMachineFunction();
2455 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2456 AFI->setReturnRegsCount(RVLocs.size());
2457
2458 // Copy the result values into the output registers.
2459 for (unsigned i = 0, realRVLocIdx = 0;
2460 i != RVLocs.size();
2461 ++i, ++realRVLocIdx) {
2462 CCValAssign &VA = RVLocs[i];
2463 assert(VA.isRegLoc() && "Can only return in registers!")(static_cast <bool> (VA.isRegLoc() && "Can only return in registers!"
) ? void (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2463, __extension__ __PRETTY_FUNCTION__))
;
2464
2465 SDValue Arg = OutVals[realRVLocIdx];
2466
2467 switch (VA.getLocInfo()) {
2468 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2468)
;
2469 case CCValAssign::Full: break;
2470 case CCValAssign::BCvt:
2471 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2472 break;
2473 }
2474
2475 if (VA.needsCustom()) {
2476 if (VA.getLocVT() == MVT::v2f64) {
2477 // Extract the first half and return it in two registers.
2478 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2479 DAG.getConstant(0, dl, MVT::i32));
2480 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2481 DAG.getVTList(MVT::i32, MVT::i32), Half);
2482
2483 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2484 HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2485 Flag);
2486 Flag = Chain.getValue(1);
2487 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2488 VA = RVLocs[++i]; // skip ahead to next loc
2489 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2490 HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2491 Flag);
2492 Flag = Chain.getValue(1);
2493 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2494 VA = RVLocs[++i]; // skip ahead to next loc
2495
2496 // Extract the 2nd half and fall through to handle it as an f64 value.
2497 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2498 DAG.getConstant(1, dl, MVT::i32));
2499 }
2500 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2501 // available.
2502 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2503 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2504 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2505 fmrrd.getValue(isLittleEndian ? 0 : 1),
2506 Flag);
2507 Flag = Chain.getValue(1);
2508 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2509 VA = RVLocs[++i]; // skip ahead to next loc
2510 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2511 fmrrd.getValue(isLittleEndian ? 1 : 0),
2512 Flag);
2513 } else
2514 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2515
2516 // Guarantee that all emitted copies are
2517 // stuck together, avoiding something bad.
2518 Flag = Chain.getValue(1);
2519 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2520 }
2521 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2522 const MCPhysReg *I =
2523 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2524 if (I) {
2525 for (; *I; ++I) {
2526 if (ARM::GPRRegClass.contains(*I))
2527 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2528 else if (ARM::DPRRegClass.contains(*I))
2529 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2530 else
2531 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2531)
;
2532 }
2533 }
2534
2535 // Update chain and glue.
2536 RetOps[0] = Chain;
2537 if (Flag.getNode())
2538 RetOps.push_back(Flag);
2539
2540 // CPUs which aren't M-class use a special sequence to return from
2541 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2542 // though we use "subs pc, lr, #N").
2543 //
2544 // M-class CPUs actually use a normal return sequence with a special
2545 // (hardware-provided) value in LR, so the normal code path works.
2546 if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
2547 !Subtarget->isMClass()) {
2548 if (Subtarget->isThumb1Only())
2549 report_fatal_error("interrupt attribute is not supported in Thumb1");
2550 return LowerInterruptReturn(RetOps, dl, DAG);
2551 }
2552
2553 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2554}
2555
2556bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2557 if (N->getNumValues() != 1)
2558 return false;
2559 if (!N->hasNUsesOfValue(1, 0))
2560 return false;
2561
2562 SDValue TCChain = Chain;
2563 SDNode *Copy = *N->use_begin();
2564 if (Copy->getOpcode() == ISD::CopyToReg) {
2565 // If the copy has a glue operand, we conservatively assume it isn't safe to
2566 // perform a tail call.
2567 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2568 return false;
2569 TCChain = Copy->getOperand(0);
2570 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2571 SDNode *VMov = Copy;
2572 // f64 returned in a pair of GPRs.
2573 SmallPtrSet<SDNode*, 2> Copies;
2574 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2575 UI != UE; ++UI) {
2576 if (UI->getOpcode() != ISD::CopyToReg)
2577 return false;
2578 Copies.insert(*UI);
2579 }
2580 if (Copies.size() > 2)
2581 return false;
2582
2583 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2584 UI != UE; ++UI) {
2585 SDValue UseChain = UI->getOperand(0);
2586 if (Copies.count(UseChain.getNode()))
2587 // Second CopyToReg
2588 Copy = *UI;
2589 else {
2590 // We are at the top of this chain.
2591 // If the copy has a glue operand, we conservatively assume it
2592 // isn't safe to perform a tail call.
2593 if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2594 return false;
2595 // First CopyToReg
2596 TCChain = UseChain;
2597 }
2598 }
2599 } else if (Copy->getOpcode() == ISD::BITCAST) {
2600 // f32 returned in a single GPR.
2601 if (!Copy->hasOneUse())
2602 return false;
2603 Copy = *Copy->use_begin();
2604 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2605 return false;
2606 // If the copy has a glue operand, we conservatively assume it isn't safe to
2607 // perform a tail call.
2608 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2609 return false;
2610 TCChain = Copy->getOperand(0);
2611 } else {
2612 return false;
2613 }
2614
2615 bool HasRet = false;
2616 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2617 UI != UE; ++UI) {
2618 if (UI->getOpcode() != ARMISD::RET_FLAG &&
2619 UI->getOpcode() != ARMISD::INTRET_FLAG)
2620 return false;
2621 HasRet = true;
2622 }
2623
2624 if (!HasRet)
2625 return false;
2626
2627 Chain = TCChain;
2628 return true;
2629}
2630
2631bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2632 if (!Subtarget->supportsTailCall())
2633 return false;
2634
2635 auto Attr =
2636 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2637 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2638 return false;
2639
2640 return true;
2641}
2642
2643// Trying to write a 64 bit value so need to split into two 32 bit values first,
2644// and pass the lower and high parts through.
2645static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
2646 SDLoc DL(Op);
2647 SDValue WriteValue = Op->getOperand(2);
2648
2649 // This function is only supposed to be called for i64 type argument.
2650 assert(WriteValue.getValueType() == MVT::i64(static_cast <bool> (WriteValue.getValueType() == MVT::
i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2651, __extension__ __PRETTY_FUNCTION__))
2651 && "LowerWRITE_REGISTER called for non-i64 type argument.")(static_cast <bool> (WriteValue.getValueType() == MVT::
i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? void (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2651, __extension__ __PRETTY_FUNCTION__))
;
2652
2653 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2654 DAG.getConstant(0, DL, MVT::i32));
2655 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2656 DAG.getConstant(1, DL, MVT::i32));
2657 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2658 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2659}
2660
2661// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2662// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2663// one of the above mentioned nodes. It has to be wrapped because otherwise
2664// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2665// be used to form addressing mode. These wrapped nodes will be selected
2666// into MOVi.
2667SDValue ARMTargetLowering::LowerConstantPool(SDValue Op,
2668 SelectionDAG &DAG) const {
2669 EVT PtrVT = Op.getValueType();
2670 // FIXME there is no actual debug info here
2671 SDLoc dl(Op);
2672 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2673 SDValue Res;
2674
2675 // When generating execute-only code Constant Pools must be promoted to the
2676 // global data section. It's a bit ugly that we can't share them across basic
2677 // blocks, but this way we guarantee that execute-only behaves correct with
2678 // position-independent addressing modes.
2679 if (Subtarget->genExecuteOnly()) {
2680 auto AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>();
2681 auto T = const_cast<Type*>(CP->getType());
2682 auto C = const_cast<Constant*>(CP->getConstVal());
2683 auto M = const_cast<Module*>(DAG.getMachineFunction().
2684 getFunction()->getParent());
2685 auto GV = new GlobalVariable(
2686 *M, T, /*isConst=*/true, GlobalVariable::InternalLinkage, C,
2687 Twine(DAG.getDataLayout().getPrivateGlobalPrefix()) + "CP" +
2688 Twine(DAG.getMachineFunction().getFunctionNumber()) + "_" +
2689 Twine(AFI->createPICLabelUId())
2690 );
2691 SDValue GA = DAG.getTargetGlobalAddress(dyn_cast<GlobalValue>(GV),
2692 dl, PtrVT);
2693 return LowerGlobalAddress(GA, DAG);
2694 }
2695
2696 if (CP->isMachineConstantPoolEntry())
2697 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2698 CP->getAlignment());
2699 else
2700 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2701 CP->getAlignment());
2702 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2703}
2704
2705unsigned ARMTargetLowering::getJumpTableEncoding() const {
2706 return MachineJumpTableInfo::EK_Inline;
2707}
2708
2709SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2710 SelectionDAG &DAG) const {
2711 MachineFunction &MF = DAG.getMachineFunction();
2712 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2713 unsigned ARMPCLabelIndex = 0;
2714 SDLoc DL(Op);
2715 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2716 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2717 SDValue CPAddr;
2718 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2719 if (!IsPositionIndependent) {
2720 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2721 } else {
2722 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2723 ARMPCLabelIndex = AFI->createPICLabelUId();
2724 ARMConstantPoolValue *CPV =
2725 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2726 ARMCP::CPBlockAddress, PCAdj);
2727 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2728 }
2729 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2730 SDValue Result = DAG.getLoad(
2731 PtrVT, DL, DAG.getEntryNode(), CPAddr,
2732 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2733 if (!IsPositionIndependent)
2734 return Result;
2735 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2736 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2737}
2738
2739/// \brief Convert a TLS address reference into the correct sequence of loads
2740/// and calls to compute the variable's address for Darwin, and return an
2741/// SDValue containing the final node.
2742
2743/// Darwin only has one TLS scheme which must be capable of dealing with the
2744/// fully general situation, in the worst case. This means:
2745/// + "extern __thread" declaration.
2746/// + Defined in a possibly unknown dynamic library.
2747///
2748/// The general system is that each __thread variable has a [3 x i32] descriptor
2749/// which contains information used by the runtime to calculate the address. The
2750/// only part of this the compiler needs to know about is the first word, which
2751/// contains a function pointer that must be called with the address of the
2752/// entire descriptor in "r0".
2753///
2754/// Since this descriptor may be in a different unit, in general access must
2755/// proceed along the usual ARM rules. A common sequence to produce is:
2756///
2757/// movw rT1, :lower16:_var$non_lazy_ptr
2758/// movt rT1, :upper16:_var$non_lazy_ptr
2759/// ldr r0, [rT1]
2760/// ldr rT2, [r0]
2761/// blx rT2
2762/// [...address now in r0...]
2763SDValue
2764ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2765 SelectionDAG &DAG) const {
2766 assert(Subtarget->isTargetDarwin() &&(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2767, __extension__ __PRETTY_FUNCTION__))
2767 "This function expects a Darwin target")(static_cast <bool> (Subtarget->isTargetDarwin() &&
"This function expects a Darwin target") ? void (0) : __assert_fail
("Subtarget->isTargetDarwin() && \"This function expects a Darwin target\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2767, __extension__ __PRETTY_FUNCTION__))
;
2768 SDLoc DL(Op);
2769
2770 // First step is to get the address of the actua global symbol. This is where
2771 // the TLS descriptor lives.
2772 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2773
2774 // The first entry in the descriptor is a function pointer that we must call
2775 // to obtain the address of the variable.
2776 SDValue Chain = DAG.getEntryNode();
2777 SDValue FuncTLVGet = DAG.getLoad(
2778 MVT::i32, DL, Chain, DescAddr,
2779 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2780 /* Alignment = */ 4,
2781 MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
2782 MachineMemOperand::MOInvariant);
2783 Chain = FuncTLVGet.getValue(1);
2784
2785 MachineFunction &F = DAG.getMachineFunction();
2786 MachineFrameInfo &MFI = F.getFrameInfo();
2787 MFI.setAdjustsStack(true);
2788
2789 // TLS calls preserve all registers except those that absolutely must be
2790 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2791 // silly).
2792 auto TRI =
2793 getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
2794 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2795 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2796
2797 // Finally, we can make the call. This is just a degenerate version of a
2798 // normal AArch64 call node: r0 takes the address of the descriptor, and
2799 // returns the address of the variable in this thread.
2800 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2801 Chain =
2802 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
2803 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2804 DAG.getRegisterMask(Mask), Chain.getValue(1));
2805 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2806}
2807
2808SDValue
2809ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2810 SelectionDAG &DAG) const {
2811 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")(static_cast <bool> (Subtarget->isTargetWindows() &&
"Windows specific TLS lowering") ? void (0) : __assert_fail (
"Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2811, __extension__ __PRETTY_FUNCTION__))
;
2812
2813 SDValue Chain = DAG.getEntryNode();
2814 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2815 SDLoc DL(Op);
2816
2817 // Load the current TEB (thread environment block)
2818 SDValue Ops[] = {Chain,
2819 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2820 DAG.getConstant(15, DL, MVT::i32),
2821 DAG.getConstant(0, DL, MVT::i32),
2822 DAG.getConstant(13, DL, MVT::i32),
2823 DAG.getConstant(0, DL, MVT::i32),
2824 DAG.getConstant(2, DL, MVT::i32)};
2825 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2826 DAG.getVTList(MVT::i32, MVT::Other), Ops);
2827
2828 SDValue TEB = CurrentTEB.getValue(0);
2829 Chain = CurrentTEB.getValue(1);
2830
2831 // Load the ThreadLocalStoragePointer from the TEB
2832 // A pointer to the TLS array is located at offset 0x2c from the TEB.
2833 SDValue TLSArray =
2834 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2835 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2836
2837 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2838 // offset into the TLSArray.
2839
2840 // Load the TLS index from the C runtime
2841 SDValue TLSIndex =
2842 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2843 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2844 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2845
2846 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2847 DAG.getConstant(2, DL, MVT::i32));
2848 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2849 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2850 MachinePointerInfo());
2851
2852 // Get the offset of the start of the .tls section (section base)
2853 const auto *GA = cast<GlobalAddressSDNode>(Op);
2854 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2855 SDValue Offset = DAG.getLoad(
2856 PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2857 DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2858 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2859
2860 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2861}
2862
2863// Lower ISD::GlobalTLSAddress using the "general dynamic" model
2864SDValue
2865ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2866 SelectionDAG &DAG) const {
2867 SDLoc dl(GA);
2868 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2869 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2870 MachineFunction &MF = DAG.getMachineFunction();
2871 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2872 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2873 ARMConstantPoolValue *CPV =
2874 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2875 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2876 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2877 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2878 Argument = DAG.getLoad(
2879 PtrVT, dl, DAG.getEntryNode(), Argument,
2880 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2881 SDValue Chain = Argument.getValue(1);
2882
2883 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2884 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2885
2886 // call __tls_get_addr.
2887 ArgListTy Args;
2888 ArgListEntry Entry;
2889 Entry.Node = Argument;
2890 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2891 Args.push_back(Entry);
2892
2893 // FIXME: is there useful debug info available here?
2894 TargetLowering::CallLoweringInfo CLI(DAG);
2895 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2896 CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
2897 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2898
2899 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2900 return CallResult.first;
2901}
2902
2903// Lower ISD::GlobalTLSAddress using the "initial exec" or
2904// "local exec" model.
2905SDValue
2906ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2907 SelectionDAG &DAG,
2908 TLSModel::Model model) const {
2909 const GlobalValue *GV = GA->getGlobal();
2910 SDLoc dl(GA);
2911 SDValue Offset;
2912 SDValue Chain = DAG.getEntryNode();
2913 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2914 // Get the Thread Pointer
2915 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2916
2917 if (model == TLSModel::InitialExec) {
2918 MachineFunction &MF = DAG.getMachineFunction();
2919 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2920 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2921 // Initial exec model.
2922 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2923 ARMConstantPoolValue *CPV =
2924 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2925 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
2926 true);
2927 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2928 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2929 Offset = DAG.getLoad(
2930 PtrVT, dl, Chain, Offset,
2931 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2932 Chain = Offset.getValue(1);
2933
2934 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2935 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2936
2937 Offset = DAG.getLoad(
2938 PtrVT, dl, Chain, Offset,
2939 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2940 } else {
2941 // local exec model
2942 assert(model == TLSModel::LocalExec)(static_cast <bool> (model == TLSModel::LocalExec) ? void
(0) : __assert_fail ("model == TLSModel::LocalExec", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2942, __extension__ __PRETTY_FUNCTION__))
;
2943 ARMConstantPoolValue *CPV =
2944 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
2945 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2946 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2947 Offset = DAG.getLoad(
2948 PtrVT, dl, Chain, Offset,
2949 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2950 }
2951
2952 // The address of the thread local variable is the add of the thread
2953 // pointer with the offset of the variable.
2954 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2955}
2956
2957SDValue
2958ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2959 if (Subtarget->isTargetDarwin())
2960 return LowerGlobalTLSAddressDarwin(Op, DAG);
2961
2962 if (Subtarget->isTargetWindows())
2963 return LowerGlobalTLSAddressWindows(Op, DAG);
2964
2965 // TODO: implement the "local dynamic" model
2966 assert(Subtarget->isTargetELF() && "Only ELF implemented here")(static_cast <bool> (Subtarget->isTargetELF() &&
"Only ELF implemented here") ? void (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2966, __extension__ __PRETTY_FUNCTION__))
;
2967 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2968 if (DAG.getTarget().Options.EmulatedTLS)
2969 return LowerToTLSEmulatedModel(GA, DAG);
2970
2971 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2972
2973 switch (model) {
2974 case TLSModel::GeneralDynamic:
2975 case TLSModel::LocalDynamic:
2976 return LowerToTLSGeneralDynamicModel(GA, DAG);
2977 case TLSModel::InitialExec:
2978 case TLSModel::LocalExec:
2979 return LowerToTLSExecModels(GA, DAG, model);
2980 }
2981 llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 2981)
;
2982}
2983
2984/// Return true if all users of V are within function F, looking through
2985/// ConstantExprs.
2986static bool allUsersAreInFunction(const Value *V, const Function *F) {
2987 SmallVector<const User*,4> Worklist;
2988 for (auto *U : V->users())
2989 Worklist.push_back(U);
2990 while (!Worklist.empty()) {
2991 auto *U = Worklist.pop_back_val();
2992 if (isa<ConstantExpr>(U)) {
2993 for (auto *UU : U->users())
2994 Worklist.push_back(UU);
2995 continue;
2996 }
2997
2998 auto *I = dyn_cast<Instruction>(U);
2999 if (!I || I->getParent()->getParent() != F)
3000 return false;
3001 }
3002 return true;
3003}
3004
3005/// Return true if all users of V are within some (any) function, looking through
3006/// ConstantExprs. In other words, are there any global constant users?
3007static bool allUsersAreInFunctions(const Value *V) {
3008 SmallVector<const User*,4> Worklist;
3009 for (auto *U : V->users())
3010 Worklist.push_back(U);
3011 while (!Worklist.empty()) {
3012 auto *U = Worklist.pop_back_val();
3013 if (isa<ConstantExpr>(U)) {
3014 for (auto *UU : U->users())
3015 Worklist.push_back(UU);
3016 continue;
3017 }
3018
3019 if (!isa<Instruction>(U))
3020 return false;
3021 }
3022 return true;
3023}
3024
3025// Return true if T is an integer, float or an array/vector of either.
3026static bool isSimpleType(Type *T) {
3027 if (T->isIntegerTy() || T->isFloatingPointTy())
3028 return true;
3029 Type *SubT = nullptr;
3030 if (T->isArrayTy())
3031 SubT = T->getArrayElementType();
3032 else if (T->isVectorTy())
3033 SubT = T->getVectorElementType();
3034 else
3035 return false;
3036 return SubT->isIntegerTy() || SubT->isFloatingPointTy();
3037}
3038
3039static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
3040 EVT PtrVT, const SDLoc &dl) {
3041 // If we're creating a pool entry for a constant global with unnamed address,
3042 // and the global is small enough, we can emit it inline into the constant pool
3043 // to save ourselves an indirection.
3044 //
3045 // This is a win if the constant is only used in one function (so it doesn't
3046 // need to be duplicated) or duplicating the constant wouldn't increase code
3047 // size (implying the constant is no larger than 4 bytes).
3048 const Function *F = DAG.getMachineFunction().getFunction();
3049
3050 // We rely on this decision to inline being idemopotent and unrelated to the
3051 // use-site. We know that if we inline a variable at one use site, we'll
3052 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3053 // doesn't know about this optimization, so bail out if it's enabled else
3054 // we could decide to inline here (and thus never emit the GV) but require
3055 // the GV from fast-isel generated code.
3056 if (!EnableConstpoolPromotion ||
3057 DAG.getMachineFunction().getTarget().Options.EnableFastISel)
3058 return SDValue();
3059
3060 auto *GVar = dyn_cast<GlobalVariable>(GV);
3061 if (!GVar || !GVar->hasInitializer() ||
3062 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3063 !GVar->hasLocalLinkage())
3064 return SDValue();
3065
3066 // Ensure that we don't try and inline any type that contains pointers. If
3067 // we inline a value that contains relocations, we move the relocations from
3068 // .data to .text which is not ideal.
3069 auto *Init = GVar->getInitializer();
3070 if (!isSimpleType(Init->getType()))
3071 return SDValue();
3072
3073 // The constant islands pass can only really deal with alignment requests
3074 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3075 // any type wanting greater alignment requirements than 4 bytes. We also
3076 // can only promote constants that are multiples of 4 bytes in size or
3077 // are paddable to a multiple of 4. Currently we only try and pad constants
3078 // that are strings for simplicity.
3079 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3080 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3081 unsigned Align = GVar->getAlignment();
3082 unsigned RequiredPadding = 4 - (Size % 4);
3083 bool PaddingPossible =
3084 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3085 if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3086 Size == 0)
3087 return SDValue();
3088
3089 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3090 MachineFunction &MF = DAG.getMachineFunction();
3091 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3092
3093 // We can't bloat the constant pool too much, else the ConstantIslands pass
3094 // may fail to converge. If we haven't promoted this global yet (it may have
3095 // multiple uses), and promoting it would increase the constant pool size (Sz
3096 // > 4), ensure we have space to do so up to MaxTotal.
3097 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3098 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3099 ConstpoolPromotionMaxTotal)
3100 return SDValue();
3101
3102 // This is only valid if all users are in a single function OR it has users
3103 // in multiple functions but it no larger than a pointer. We also check if
3104 // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
3105 // address taken.
3106 if (!allUsersAreInFunction(GVar, F) &&
3107 !(Size <= 4 && allUsersAreInFunctions(GVar)))
3108 return SDValue();
3109
3110 // We're going to inline this global. Pad it out if needed.
3111 if (RequiredPadding != 4) {
3112 StringRef S = CDAInit->getAsString();
3113
3114 SmallVector<uint8_t,16> V(S.size());
3115 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3116 while (RequiredPadding--)
3117 V.push_back(0);
3118 Init = ConstantDataArray::get(*DAG.getContext(), V);
3119 }
3120
3121 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3122 SDValue CPAddr =
3123 DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3124 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3125 AFI->markGlobalAsPromotedToConstantPool(GVar);
3126 AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3127 PaddedSize - 4);
3128 }
3129 ++NumConstpoolPromoted;
3130 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3131}
3132
3133bool ARMTargetLowering::isReadOnly(const GlobalValue *GV) const {
3134 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3135 GV = GA->getBaseObject();
3136 return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3137 isa<Function>(GV);
3138}
3139
3140SDValue ARMTargetLowering::LowerGlobalAddress(SDValue Op,
3141 SelectionDAG &DAG) const {
3142 switch (Subtarget->getTargetTriple().getObjectFormat()) {
3143 default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3143)
;
3144 case Triple::COFF:
3145 return LowerGlobalAddressWindows(Op, DAG);
3146 case Triple::ELF:
3147 return LowerGlobalAddressELF(Op, DAG);
3148 case Triple::MachO:
3149 return LowerGlobalAddressDarwin(Op, DAG);
3150 }
3151}
3152
3153SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3154 SelectionDAG &DAG) const {
3155 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3156 SDLoc dl(Op);
3157 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3158 const TargetMachine &TM = getTargetMachine();
3159 bool IsRO = isReadOnly(GV);
3160
3161 // promoteToConstantPool only if not generating XO text section
3162 if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3163 if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
3164 return V;
3165
3166 if (isPositionIndependent()) {
3167 bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3168 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3169 UseGOT_PREL ? ARMII::MO_GOT : 0);
3170 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3171 if (UseGOT_PREL)
3172 Result =
3173 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3174 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3175 return Result;
3176 } else if (Subtarget->isROPI() && IsRO) {
3177 // PC-relative.
3178 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3179 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3180 return Result;
3181 } else if (Subtarget->isRWPI() && !IsRO) {
3182 // SB-relative.
3183 SDValue RelAddr;
3184 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3185 ++NumMovwMovt;
3186 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3187 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3188 } else { // use literal pool for address constant
3189 ARMConstantPoolValue *CPV =
3190 ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
3191 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3192 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3193 RelAddr = DAG.getLoad(
3194 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3195 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3196 }
3197 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3198 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3199 return Result;
3200 }
3201
3202 // If we have T2 ops, we can materialize the address directly via movt/movw
3203 // pair. This is always cheaper.
3204 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3205 ++NumMovwMovt;
3206 // FIXME: Once remat is capable of dealing with instructions with register
3207 // operands, expand this into two nodes.
3208 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3209 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3210 } else {
3211 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3212 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3213 return DAG.getLoad(
3214 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3215 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3216 }
3217}
3218
3219SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3220 SelectionDAG &DAG) const {
3221 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3222, __extension__ __PRETTY_FUNCTION__))
3222 "ROPI/RWPI not currently supported for Darwin")(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Darwin"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3222, __extension__ __PRETTY_FUNCTION__))
;
3223 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3224 SDLoc dl(Op);
3225 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3226
3227 if (Subtarget->useMovt(DAG.getMachineFunction()))
3228 ++NumMovwMovt;
3229
3230 // FIXME: Once remat is capable of dealing with instructions with register
3231 // operands, expand this into multiple nodes
3232 unsigned Wrapper =
3233 isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3234
3235 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3236 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3237
3238 if (Subtarget->isGVIndirectSymbol(GV))
3239 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3240 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3241 return Result;
3242}
3243
3244SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3245 SelectionDAG &DAG) const {
3246 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")(static_cast <bool> (Subtarget->isTargetWindows() &&
"non-Windows COFF is not supported") ? void (0) : __assert_fail
("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3246, __extension__ __PRETTY_FUNCTION__))
;
3247 assert(Subtarget->useMovt(DAG.getMachineFunction()) &&(static_cast <bool> (Subtarget->useMovt(DAG.getMachineFunction
()) && "Windows on ARM expects to use movw/movt") ? void
(0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3248, __extension__ __PRETTY_FUNCTION__))
3248 "Windows on ARM expects to use movw/movt")(static_cast <bool> (Subtarget->useMovt(DAG.getMachineFunction
()) && "Windows on ARM expects to use movw/movt") ? void
(0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3248, __extension__ __PRETTY_FUNCTION__))
;
3249 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3250, __extension__ __PRETTY_FUNCTION__))
3250 "ROPI/RWPI not currently supported for Windows")(static_cast <bool> (!Subtarget->isROPI() &&
!Subtarget->isRWPI() && "ROPI/RWPI not currently supported for Windows"
) ? void (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3250, __extension__ __PRETTY_FUNCTION__))
;
3251
3252 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3253 const ARMII::TOF TargetFlags =
3254 (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
3255 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3256 SDValue Result;
3257 SDLoc DL(Op);
3258
3259 ++NumMovwMovt;
3260
3261 // FIXME: Once remat is capable of dealing with instructions with register
3262 // operands, expand this into two nodes.
3263 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3264 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3265 TargetFlags));
3266 if (GV->hasDLLImportStorageClass())
3267 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3268 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3269 return Result;
3270}
3271
3272SDValue
3273ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3274 SDLoc dl(Op);
3275 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3276 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3277 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3278 Op.getOperand(1), Val);
3279}
3280
3281SDValue
3282ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3283 SDLoc dl(Op);
3284 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3285 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3286}
3287
3288SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3289 SelectionDAG &DAG) const {
3290 SDLoc dl(Op);
3291 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3292 Op.getOperand(0));
3293}
3294
3295SDValue
3296ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3297 const ARMSubtarget *Subtarget) const {
3298 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3299 SDLoc dl(Op);
3300 switch (IntNo) {
3301 default: return SDValue(); // Don't custom lower most intrinsics.
3302 case Intrinsic::thread_pointer: {
3303 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3304 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3305 }
3306 case Intrinsic::eh_sjlj_lsda: {
3307 MachineFunction &MF = DAG.getMachineFunction();
3308 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3309 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3310 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3311 SDValue CPAddr;
3312 bool IsPositionIndependent = isPositionIndependent();
3313 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3314 ARMConstantPoolValue *CPV =
3315 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
3316 ARMCP::CPLSDA, PCAdj);
3317 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3318 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3319 SDValue Result = DAG.getLoad(
3320 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3321 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3322
3323 if (IsPositionIndependent) {
3324 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3325 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3326 }
3327 return Result;
3328 }
3329 case Intrinsic::arm_neon_vabs:
3330 return DAG.getNode(ISD::ABS, SDLoc(Op), Op.getValueType(),
3331 Op.getOperand(1));
3332 case Intrinsic::arm_neon_vmulls:
3333 case Intrinsic::arm_neon_vmullu: {
3334 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3335 ? ARMISD::VMULLs : ARMISD::VMULLu;
3336 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3337 Op.getOperand(1), Op.getOperand(2));
3338 }
3339 case Intrinsic::arm_neon_vminnm:
3340 case Intrinsic::arm_neon_vmaxnm: {
3341 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3342 ? ISD::FMINNUM : ISD::FMAXNUM;
3343 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3344 Op.getOperand(1), Op.getOperand(2));
3345 }
3346 case Intrinsic::arm_neon_vminu:
3347 case Intrinsic::arm_neon_vmaxu: {
3348 if (Op.getValueType().isFloatingPoint())
3349 return SDValue();
3350 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3351 ? ISD::UMIN : ISD::UMAX;
3352 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3353 Op.getOperand(1), Op.getOperand(2));
3354 }
3355 case Intrinsic::arm_neon_vmins:
3356 case Intrinsic::arm_neon_vmaxs: {
3357 // v{min,max}s is overloaded between signed integers and floats.
3358 if (!Op.getValueType().isFloatingPoint()) {
3359 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3360 ? ISD::SMIN : ISD::SMAX;
3361 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3362 Op.getOperand(1), Op.getOperand(2));
3363 }
3364 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3365 ? ISD::FMINNAN : ISD::FMAXNAN;
3366 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3367 Op.getOperand(1), Op.getOperand(2));
3368 }
3369 case Intrinsic::arm_neon_vtbl1:
3370 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3371 Op.getOperand(1), Op.getOperand(2));
3372 case Intrinsic::arm_neon_vtbl2:
3373 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3374 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3375 }
3376}
3377
3378static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
3379 const ARMSubtarget *Subtarget) {
3380 SDLoc dl(Op);
3381 ConstantSDNode *SSIDNode = cast<ConstantSDNode>(Op.getOperand(2));
3382 auto SSID = static_cast<SyncScope::ID>(SSIDNode->getZExtValue());
3383 if (SSID == SyncScope::SingleThread)
3384 return Op;
3385
3386 if (!Subtarget->hasDataBarrier()) {
3387 // Some ARMv6 cpus can support data barriers with an mcr instruction.
3388 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3389 // here.
3390 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&(static_cast <bool> (Subtarget->hasV6Ops() &&
!Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3391, __extension__ __PRETTY_FUNCTION__))
3391 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")(static_cast <bool> (Subtarget->hasV6Ops() &&
!Subtarget->isThumb() && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? void (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3391, __extension__ __PRETTY_FUNCTION__))
;
3392 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3393 DAG.getConstant(0, dl, MVT::i32));
3394 }
3395
3396 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3397 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3398 ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3399 if (Subtarget->isMClass()) {
3400 // Only a full system barrier exists in the M-class architectures.
3401 Domain = ARM_MB::SY;
3402 } else if (Subtarget->preferISHSTBarriers() &&
3403 Ord == AtomicOrdering::Release) {
3404 // Swift happens to implement ISHST barriers in a way that's compatible with
3405 // Release semantics but weaker than ISH so we'd be fools not to use
3406 // it. Beware: other processors probably don't!
3407 Domain = ARM_MB::ISHST;
3408 }
3409
3410 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3411 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3412 DAG.getConstant(Domain, dl, MVT::i32));
3413}
3414
3415static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
3416 const ARMSubtarget *Subtarget) {
3417 // ARM pre v5TE and Thumb1 does not have preload instructions.
3418 if (!(Subtarget->isThumb2() ||
3419 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3420 // Just preserve the chain.
3421 return Op.getOperand(0);
3422
3423 SDLoc dl(Op);
3424 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3425 if (!isRead &&
3426 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3427 // ARMv7 with MP extension has PLDW.
3428 return Op.getOperand(0);
3429
3430 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3431 if (Subtarget->isThumb()) {
3432 // Invert the bits.
3433 isRead = ~isRead & 1;
3434 isData = ~isData & 1;
3435 }
3436
3437 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3438 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3439 DAG.getConstant(isData, dl, MVT::i32));
3440}
3441
3442static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
3443 MachineFunction &MF = DAG.getMachineFunction();
3444 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3445
3446 // vastart just stores the address of the VarArgsFrameIndex slot into the
3447 // memory location argument.
3448 SDLoc dl(Op);
3449 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3450 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3451 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3452 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3453 MachinePointerInfo(SV));
3454}
3455
3456SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3457 CCValAssign &NextVA,
3458 SDValue &Root,
3459 SelectionDAG &DAG,
3460 const SDLoc &dl) const {
3461 MachineFunction &MF = DAG.getMachineFunction();
3462 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3463
3464 const TargetRegisterClass *RC;
3465 if (AFI->isThumb1OnlyFunction())
3466 RC = &ARM::tGPRRegClass;
3467 else
3468 RC = &ARM::GPRRegClass;
3469
3470 // Transform the arguments stored in physical registers into virtual ones.
3471 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3472 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3473
3474 SDValue ArgValue2;
3475 if (NextVA.isMemLoc()) {
3476 MachineFrameInfo &MFI = MF.getFrameInfo();
3477 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3478
3479 // Create load node to retrieve arguments from the stack.
3480 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3481 ArgValue2 = DAG.getLoad(
3482 MVT::i32, dl, Root, FIN,
3483 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3484 } else {
3485 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3486 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3487 }
3488 if (!Subtarget->isLittle())
3489 std::swap (ArgValue, ArgValue2);
3490 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3491}
3492
3493// The remaining GPRs hold either the beginning of variable-argument
3494// data, or the beginning of an aggregate passed by value (usually
3495// byval). Either way, we allocate stack slots adjacent to the data
3496// provided by our caller, and store the unallocated registers there.
3497// If this is a variadic function, the va_list pointer will begin with
3498// these values; otherwise, this reassembles a (byval) structure that
3499// was split between registers and memory.
3500// Return: The frame index registers were stored into.
3501int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3502 const SDLoc &dl, SDValue &Chain,
3503 const Value *OrigArg,
3504 unsigned InRegsParamRecordIdx,
3505 int ArgOffset, unsigned ArgSize) const {
3506 // Currently, two use-cases possible:
3507 // Case #1. Non-var-args function, and we meet first byval parameter.
3508 // Setup first unallocated register as first byval register;
3509 // eat all remained registers
3510 // (these two actions are performed by HandleByVal method).
3511 // Then, here, we initialize stack frame with
3512 // "store-reg" instructions.
3513 // Case #2. Var-args function, that doesn't contain byval parameters.
3514 // The same: eat all remained unallocated registers,
3515 // initialize stack frame.
3516
3517 MachineFunction &MF = DAG.getMachineFunction();
3518 MachineFrameInfo &MFI = MF.getFrameInfo();
3519 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3520 unsigned RBegin, REnd;
3521 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3522 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3523 } else {
3524 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3525 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3526 REnd = ARM::R4;
3527 }
3528
3529 if (REnd != RBegin)
3530 ArgOffset = -4 * (ARM::R4 - RBegin);
3531
3532 auto PtrVT = getPointerTy(DAG.getDataLayout());
3533 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3534 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3535
3536 SmallVector<SDValue, 4> MemOps;
3537 const TargetRegisterClass *RC =
3538 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3539
3540 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3541 unsigned VReg = MF.addLiveIn(Reg, RC);
3542 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3543 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3544 MachinePointerInfo(OrigArg, 4 * i));
3545 MemOps.push_back(Store);
3546 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3547 }
3548
3549 if (!MemOps.empty())
3550 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3551 return FrameIndex;
3552}
3553
3554// Setup stack frame, the va_list pointer will start from.
3555void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3556 const SDLoc &dl, SDValue &Chain,
3557 unsigned ArgOffset,
3558 unsigned TotalArgRegsSaveSize,
3559 bool ForceMutable) const {
3560 MachineFunction &MF = DAG.getMachineFunction();
3561 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3562
3563 // Try to store any remaining integer argument regs
3564 // to their spots on the stack so that they may be loaded by dereferencing
3565 // the result of va_next.
3566 // If there is no regs to be stored, just point address after last
3567 // argument passed via stack.
3568 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3569 CCInfo.getInRegsParamsCount(),
3570 CCInfo.getNextStackOffset(), 4);
3571 AFI->setVarArgsFrameIndex(FrameIndex);
3572}
3573
3574SDValue ARMTargetLowering::LowerFormalArguments(
3575 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3576 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3577 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3578 MachineFunction &MF = DAG.getMachineFunction();
3579 MachineFrameInfo &MFI = MF.getFrameInfo();
3580
3581 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3582
3583 // Assign locations to all of the incoming arguments.
3584 SmallVector<CCValAssign, 16> ArgLocs;
3585 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3586 *DAG.getContext());
3587 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3588
3589 SmallVector<SDValue, 16> ArgValues;
3590 SDValue ArgValue;
3591 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
3592 unsigned CurArgIdx = 0;
3593
3594 // Initially ArgRegsSaveSize is zero.
3595 // Then we increase this value each time we meet byval parameter.
3596 // We also increase this value in case of varargs function.
3597 AFI->setArgRegsSaveSize(0);
3598
3599 // Calculate the amount of stack space that we need to allocate to store
3600 // byval and variadic arguments that are passed in registers.
3601 // We need to know this before we allocate the first byval or variadic
3602 // argument, as they will be allocated a stack slot below the CFA (Canonical
3603 // Frame Address, the stack pointer at entry to the function).
3604 unsigned ArgRegBegin = ARM::R4;
3605 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3606 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3607 break;
3608
3609 CCValAssign &VA = ArgLocs[i];
3610 unsigned Index = VA.getValNo();
3611 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3612 if (!Flags.isByVal())
3613 continue;
3614
3615 assert(VA.isMemLoc() && "unexpected byval pointer in reg")(static_cast <bool> (VA.isMemLoc() && "unexpected byval pointer in reg"
) ? void (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3615, __extension__ __PRETTY_FUNCTION__))
;
3616 unsigned RBegin, REnd;
3617 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3618 ArgRegBegin = std::min(ArgRegBegin, RBegin);
3619
3620 CCInfo.nextInRegsParam();
3621 }
3622 CCInfo.rewindByValRegsInfo();
3623
3624 int lastInsIndex = -1;
3625 if (isVarArg && MFI.hasVAStart()) {
3626 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3627 if (RegIdx != array_lengthof(GPRArgRegs))
3628 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3629 }
3630
3631 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3632 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3633 auto PtrVT = getPointerTy(DAG.getDataLayout());
3634
3635 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3636 CCValAssign &VA = ArgLocs[i];
3637 if (Ins[VA.getValNo()].isOrigArg()) {
3638 std::advance(CurOrigArg,
3639 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3640 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3641 }
3642 // Arguments stored in registers.
3643 if (VA.isRegLoc()) {
3644 EVT RegVT = VA.getLocVT();
3645
3646 if (VA.needsCustom()) {
3647 // f64 and vector types are split up into multiple registers or
3648 // combinations of registers and stack slots.
3649 if (VA.getLocVT() == MVT::v2f64) {
3650 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3651 Chain, DAG, dl);
3652 VA = ArgLocs[++i]; // skip ahead to next loc
3653 SDValue ArgValue2;
3654 if (VA.isMemLoc()) {
3655 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3656 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3657 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3658 MachinePointerInfo::getFixedStack(
3659 DAG.getMachineFunction(), FI));
3660 } else {
3661 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3662 Chain, DAG, dl);
3663 }
3664 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3665 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3666 ArgValue, ArgValue1,
3667 DAG.getIntPtrConstant(0, dl));
3668 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3669 ArgValue, ArgValue2,
3670 DAG.getIntPtrConstant(1, dl));
3671 } else
3672 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3673 } else {
3674 const TargetRegisterClass *RC;
3675
3676 if (RegVT == MVT::f32)
3677 RC = &ARM::SPRRegClass;
3678 else if (RegVT == MVT::f64)
3679 RC = &ARM::DPRRegClass;
3680 else if (RegVT == MVT::v2f64)
3681 RC = &ARM::QPRRegClass;
3682 else if (RegVT == MVT::i32)
3683 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3684 : &ARM::GPRRegClass;
3685 else
3686 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3686)
;
3687
3688 // Transform the arguments in physical registers into virtual ones.
3689 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3690 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3691 }
3692
3693 // If this is an 8 or 16-bit value, it is really passed promoted
3694 // to 32 bits. Insert an assert[sz]ext to capture this, then
3695 // truncate to the right size.
3696 switch (VA.getLocInfo()) {
3697 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3697)
;
3698 case CCValAssign::Full: break;
3699 case CCValAssign::BCvt:
3700 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3701 break;
3702 case CCValAssign::SExt:
3703 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3704 DAG.getValueType(VA.getValVT()));
3705 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3706 break;
3707 case CCValAssign::ZExt:
3708 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3709 DAG.getValueType(VA.getValVT()));
3710 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3711 break;
3712 }
3713
3714 InVals.push_back(ArgValue);
3715 } else { // VA.isRegLoc()
3716 // sanity check
3717 assert(VA.isMemLoc())(static_cast <bool> (VA.isMemLoc()) ? void (0) : __assert_fail
("VA.isMemLoc()", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3717, __extension__ __PRETTY_FUNCTION__))
;
3718 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")(static_cast <bool> (VA.getValVT() != MVT::i64 &&
"i64 should already be lowered") ? void (0) : __assert_fail (
"VA.getValVT() != MVT::i64 && \"i64 should already be lowered\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3718, __extension__ __PRETTY_FUNCTION__))
;
3719
3720 int index = VA.getValNo();
3721
3722 // Some Ins[] entries become multiple ArgLoc[] entries.
3723 // Process them only once.
3724 if (index != lastInsIndex)
3725 {
3726 ISD::ArgFlagsTy Flags = Ins[index].Flags;
3727 // FIXME: For now, all byval parameter objects are marked mutable.
3728 // This can be changed with more analysis.
3729 // In case of tail call optimization mark all arguments mutable.
3730 // Since they could be overwritten by lowering of arguments in case of
3731 // a tail call.
3732 if (Flags.isByVal()) {
3733 assert(Ins[index].isOrigArg() &&(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3734, __extension__ __PRETTY_FUNCTION__))
3734 "Byval arguments cannot be implicit")(static_cast <bool> (Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? void (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3734, __extension__ __PRETTY_FUNCTION__))
;
3735 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3736
3737 int FrameIndex = StoreByValRegs(
3738 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3739 VA.getLocMemOffset(), Flags.getByValSize());
3740 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3741 CCInfo.nextInRegsParam();
3742 } else {
3743 unsigned FIOffset = VA.getLocMemOffset();
3744 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3745 FIOffset, true);
3746
3747 // Create load nodes to retrieve arguments from the stack.
3748 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3749 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3750 MachinePointerInfo::getFixedStack(
3751 DAG.getMachineFunction(), FI)));
3752 }
3753 lastInsIndex = index;
3754 }
3755 }
3756 }
3757
3758 // varargs
3759 if (isVarArg && MFI.hasVAStart())
3760 VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3761 CCInfo.getNextStackOffset(),
3762 TotalArgRegsSaveSize);
3763
3764 AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
3765
3766 return Chain;
3767}
3768
3769/// isFloatingPointZero - Return true if this is +0.0.
3770static bool isFloatingPointZero(SDValue Op) {
3771 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3772 return CFP->getValueAPF().isPosZero();
3773 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3774 // Maybe this has already been legalized into the constant pool?
3775 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3776 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3777 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3778 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3779 return CFP->getValueAPF().isPosZero();
3780 }
3781 } else if (Op->getOpcode() == ISD::BITCAST &&
3782 Op->getValueType(0) == MVT::f64) {
3783 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3784 // created by LowerConstantFP().
3785 SDValue BitcastOp = Op->getOperand(0);
3786 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3787 isNullConstant(BitcastOp->getOperand(0)))
3788 return true;
3789 }
3790 return false;
3791}
3792
3793/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3794/// the given operands.
3795SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3796 SDValue &ARMcc, SelectionDAG &DAG,
3797 const SDLoc &dl) const {
3798 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3799 unsigned C = RHSC->getZExtValue();
3800 if (!isLegalICmpImmediate(C)) {
3801 // Constant does not fit, try adjusting it by one?
3802 switch (CC) {
3803 default: break;
3804 case ISD::SETLT:
3805 case ISD::SETGE:
3806 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3807 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3808 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3809 }
3810 break;
3811 case ISD::SETULT:
3812 case ISD::SETUGE:
3813 if (C != 0 && isLegalICmpImmediate(C-1)) {
3814 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3815 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3816 }
3817 break;
3818 case ISD::SETLE:
3819 case ISD::SETGT:
3820 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3821 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3822 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3823 }
3824 break;
3825 case ISD::SETULE:
3826 case ISD::SETUGT:
3827 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3828 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3829 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3830 }
3831 break;
3832 }
3833 }
3834 } else if ((ARM_AM::getShiftOpcForNode(LHS.getOpcode()) != ARM_AM::no_shift) &&
3835 (ARM_AM::getShiftOpcForNode(RHS.getOpcode()) == ARM_AM::no_shift)) {
3836 // In ARM and Thumb-2, the compare instructions can shift their second
3837 // operand.
3838 CC = ISD::getSetCCSwappedOperands(CC);
3839 std::swap(LHS, RHS);
3840 }
3841
3842 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3843 ARMISD::NodeType CompareType;
3844 switch (CondCode) {
3845 default:
3846 CompareType = ARMISD::CMP;
3847 break;
3848 case ARMCC::EQ:
3849 case ARMCC::NE:
3850 // Uses only Z Flag
3851 CompareType = ARMISD::CMPZ;
3852 break;
3853 }
3854 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3855 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3856}
3857
3858/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3859SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3860 SelectionDAG &DAG, const SDLoc &dl,
3861 bool InvalidOnQNaN) const {
3862 assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64)(static_cast <bool> (!Subtarget->isFPOnlySP() || RHS
.getValueType() != MVT::f64) ? void (0) : __assert_fail ("!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3862, __extension__ __PRETTY_FUNCTION__))
;
3863 SDValue Cmp;
3864 SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3865 if (!isFloatingPointZero(RHS))
3866 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3867 else
3868 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3869 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3870}
3871
3872/// duplicateCmp - Glue values can have only one use, so this function
3873/// duplicates a comparison node.
3874SDValue
3875ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3876 unsigned Opc = Cmp.getOpcode();
3877 SDLoc DL(Cmp);
3878 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3879 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3880
3881 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")(static_cast <bool> (Opc == ARMISD::FMSTAT && "unexpected comparison operation"
) ? void (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3881, __extension__ __PRETTY_FUNCTION__))
;
3882 Cmp = Cmp.getOperand(0);
3883 Opc = Cmp.getOpcode();
3884 if (Opc == ARMISD::CMPFP)
3885 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3886 Cmp.getOperand(1), Cmp.getOperand(2));
3887 else {
3888 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")(static_cast <bool> (Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"
) ? void (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3888, __extension__ __PRETTY_FUNCTION__))
;
3889 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3890 Cmp.getOperand(1));
3891 }
3892 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3893}
3894
3895std::pair<SDValue, SDValue>
3896ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3897 SDValue &ARMcc) const {
3898 assert(Op.getValueType() == MVT::i32 && "Unsupported value type")(static_cast <bool> (Op.getValueType() == MVT::i32 &&
"Unsupported value type") ? void (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3898, __extension__ __PRETTY_FUNCTION__))
;
3899
3900 SDValue Value, OverflowCmp;
3901 SDValue LHS = Op.getOperand(0);
3902 SDValue RHS = Op.getOperand(1);
3903 SDLoc dl(Op);
3904
3905 // FIXME: We are currently always generating CMPs because we don't support
3906 // generating CMN through the backend. This is not as good as the natural
3907 // CMP case because it causes a register dependency and cannot be folded
3908 // later.
3909
3910 switch (Op.getOpcode()) {
3911 default:
3912 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 3912)
;
3913 case ISD::SADDO:
3914 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3915 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3916 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3917 break;
3918 case ISD::UADDO:
3919 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3920 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3921 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3922 break;
3923 case ISD::SSUBO:
3924 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3925 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3926 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3927 break;
3928 case ISD::USUBO:
3929 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3930 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3931 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3932 break;
3933 } // switch (...)
3934
3935 return std::make_pair(Value, OverflowCmp);
3936}
3937
3938SDValue
3939ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
3940 // Let legalize expand this if it isn't a legal type yet.
3941 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3942 return SDValue();
3943
3944 SDValue Value, OverflowCmp;
3945 SDValue ARMcc;
3946 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3947 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3948 SDLoc dl(Op);
3949 // We use 0 and 1 as false and true values.
3950 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3951 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3952 EVT VT = Op.getValueType();
3953
3954 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
3955 ARMcc, CCR, OverflowCmp);
3956
3957 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3958 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3959}
3960
3961SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3962 SDValue Cond = Op.getOperand(0);
3963 SDValue SelectTrue = Op.getOperand(1);
3964 SDValue SelectFalse = Op.getOperand(2);
3965 SDLoc dl(Op);
3966 unsigned Opc = Cond.getOpcode();
3967
3968 if (Cond.getResNo() == 1 &&
3969 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3970 Opc == ISD::USUBO)) {
3971 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
3972 return SDValue();
3973
3974 SDValue Value, OverflowCmp;
3975 SDValue ARMcc;
3976 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
3977 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3978 EVT VT = Op.getValueType();
3979
3980 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
3981 OverflowCmp, DAG);
3982 }
3983
3984 // Convert:
3985 //
3986 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
3987 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
3988 //
3989 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
3990 const ConstantSDNode *CMOVTrue =
3991 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
3992 const ConstantSDNode *CMOVFalse =
3993 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
3994
3995 if (CMOVTrue && CMOVFalse) {
3996 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
3997 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
3998
3999 SDValue True;
4000 SDValue False;
4001 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
4002 True = SelectTrue;
4003 False = SelectFalse;
4004 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
4005 True = SelectFalse;
4006 False = SelectTrue;
4007 }
4008
4009 if (True.getNode() && False.getNode()) {
4010 EVT VT = Op.getValueType();
4011 SDValue ARMcc = Cond.getOperand(2);
4012 SDValue CCR = Cond.getOperand(3);
4013 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
4014 assert(True.getValueType() == VT)(static_cast <bool> (True.getValueType() == VT) ? void (
0) : __assert_fail ("True.getValueType() == VT", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4014, __extension__ __PRETTY_FUNCTION__))
;
4015 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
4016 }
4017 }
4018 }
4019
4020 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
4021 // undefined bits before doing a full-word comparison with zero.
4022 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4023 DAG.getConstant(1, dl, Cond.getValueType()));
4024
4025 return DAG.getSelectCC(dl, Cond,
4026 DAG.getConstant(0, dl, Cond.getValueType()),
4027 SelectTrue, SelectFalse, ISD::SETNE);
4028}
4029
4030static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
4031 bool &swpCmpOps, bool &swpVselOps) {
4032 // Start by selecting the GE condition code for opcodes that return true for
4033 // 'equality'
4034 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4035 CC == ISD::SETULE)
4036 CondCode = ARMCC::GE;
4037
4038 // and GT for opcodes that return false for 'equality'.
4039 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4040 CC == ISD::SETULT)
4041 CondCode = ARMCC::GT;
4042
4043 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4044 // to swap the compare operands.
4045 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4046 CC == ISD::SETULT)
4047 swpCmpOps = true;
4048
4049 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4050 // If we have an unordered opcode, we need to swap the operands to the VSEL
4051 // instruction (effectively negating the condition).
4052 //
4053 // This also has the effect of swapping which one of 'less' or 'greater'
4054 // returns true, so we also swap the compare operands. It also switches
4055 // whether we return true for 'equality', so we compensate by picking the
4056 // opposite condition code to our original choice.
4057 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4058 CC == ISD::SETUGT) {
4059 swpCmpOps = !swpCmpOps;
4060 swpVselOps = !swpVselOps;
4061 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4062 }
4063
4064 // 'ordered' is 'anything but unordered', so use the VS condition code and
4065 // swap the VSEL operands.
4066 if (CC == ISD::SETO) {
4067 CondCode = ARMCC::VS;
4068 swpVselOps = true;
4069 }
4070
4071 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4072 // code and swap the VSEL operands.
4073 if (CC == ISD::SETUNE) {
4074 CondCode = ARMCC::EQ;
4075 swpVselOps = true;
4076 }
4077}
4078
4079SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4080 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4081 SDValue Cmp, SelectionDAG &DAG) const {
4082 if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
4083 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4084 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4085 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4086 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4087
4088 SDValue TrueLow = TrueVal.getValue(0);
4089 SDValue TrueHigh = TrueVal.getValue(1);
4090 SDValue FalseLow = FalseVal.getValue(0);
4091 SDValue FalseHigh = FalseVal.getValue(1);
4092
4093 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4094 ARMcc, CCR, Cmp);
4095 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4096 ARMcc, CCR, duplicateCmp(Cmp, DAG));
4097
4098 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
4099 } else {
4100 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
4101 Cmp);
4102 }
4103}
4104
4105static bool isGTorGE(ISD::CondCode CC) {
4106 return CC == ISD::SETGT || CC == ISD::SETGE;
4107}
4108
4109static bool isLTorLE(ISD::CondCode CC) {
4110 return CC == ISD::SETLT || CC == ISD::SETLE;
4111}
4112
4113// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
4114// All of these conditions (and their <= and >= counterparts) will do:
4115// x < k ? k : x
4116// x > k ? x : k
4117// k < x ? x : k
4118// k > x ? k : x
4119static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
4120 const SDValue TrueVal, const SDValue FalseVal,
4121 const ISD::CondCode CC, const SDValue K) {
4122 return (isGTorGE(CC) &&
4123 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
4124 (isLTorLE(CC) &&
4125 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
4126}
4127
4128// Similar to isLowerSaturate(), but checks for upper-saturating conditions.
4129static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
4130 const SDValue TrueVal, const SDValue FalseVal,
4131 const ISD::CondCode CC, const SDValue K) {
4132 return (isGTorGE(CC) &&
4133 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
4134 (isLTorLE(CC) &&
4135 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
4136}
4137
4138// Check if two chained conditionals could be converted into SSAT.
4139//
4140// SSAT can replace a set of two conditional selectors that bound a number to an
4141// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
4142//
4143// x < -k ? -k : (x > k ? k : x)
4144// x < -k ? -k : (x < k ? x : k)
4145// x > -k ? (x > k ? k : x) : -k
4146// x < k ? (x < -k ? -k : x) : k
4147// etc.
4148//
4149// It returns true if the conversion can be done, false otherwise.
4150// Additionally, the variable is returned in parameter V and the constant in K.
4151static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
4152 uint64_t &K) {
4153 SDValue LHS1 = Op.getOperand(0);
4154 SDValue RHS1 = Op.getOperand(1);
4155 SDValue TrueVal1 = Op.getOperand(2);
4156 SDValue FalseVal1 = Op.getOperand(3);
4157 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4158
4159 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
4160 if (Op2.getOpcode() != ISD::SELECT_CC)
4161 return false;
4162
4163 SDValue LHS2 = Op2.getOperand(0);
4164 SDValue RHS2 = Op2.getOperand(1);
4165 SDValue TrueVal2 = Op2.getOperand(2);
4166 SDValue FalseVal2 = Op2.getOperand(3);
4167 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
4168
4169 // Find out which are the constants and which are the variables
4170 // in each conditional
4171 SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
4172 ? &RHS1
4173 : nullptr;
4174 SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
4175 ? &RHS2
4176 : nullptr;
4177 SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
4178 SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
4179 SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
4180 SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
4181
4182 // We must detect cases where the original operations worked with 16- or
4183 // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
4184 // must work with sign-extended values but the select operations return
4185 // the original non-extended value.
4186 SDValue V2TmpReg = V2Tmp;
4187 if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
4188 V2TmpReg = V2Tmp->getOperand(0);
4189
4190 // Check that the registers and the constants have the correct values
4191 // in both conditionals
4192 if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
4193 V2TmpReg != V2)
4194 return false;
4195
4196 // Figure out which conditional is saturating the lower/upper bound.
4197 const SDValue *LowerCheckOp =
4198 isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4199 ? &Op
4200 : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4201 ? &Op2
4202 : nullptr;
4203 const SDValue *UpperCheckOp =
4204 isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4205 ? &Op
4206 : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4207 ? &Op2
4208 : nullptr;
4209
4210 if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
4211 return false;
4212
4213 // Check that the constant in the lower-bound check is
4214 // the opposite of the constant in the upper-bound check
4215 // in 1's complement.
4216 int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
4217 int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
4218 int64_t PosVal = std::max(Val1, Val2);
4219
4220 if (((Val1 > Val2 && UpperCheckOp == &Op) ||
4221 (Val1 < Val2 && UpperCheckOp == &Op2)) &&
4222 Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {
4223
4224 V = V2;
4225 K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
4226 return true;
4227 }
4228
4229 return false;
4230}
4231
4232SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4233 EVT VT = Op.getValueType();
4234 SDLoc dl(Op);
4235
4236 // Try to convert two saturating conditional selects into a single SSAT
4237 SDValue SatValue;
4238 uint64_t SatConstant;
4239 if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
4240 isSaturatingConditional(Op, SatValue, SatConstant))
4241 return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4242 DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4243
4244 SDValue LHS = Op.getOperand(0);
4245 SDValue RHS = Op.getOperand(1);
4246 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4247 SDValue TrueVal = Op.getOperand(2);
4248 SDValue FalseVal = Op.getOperand(3);
4249
4250 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4251 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4252 dl);
4253
4254 // If softenSetCCOperands only returned one value, we should compare it to
4255 // zero.
4256 if (!RHS.getNode()) {
4257 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4258 CC = ISD::SETNE;
4259 }
4260 }
4261
4262 if (LHS.getValueType() == MVT::i32) {
4263 // Try to generate VSEL on ARMv8.
4264 // The VSEL instruction can't use all the usual ARM condition
4265 // codes: it only has two bits to select the condition code, so it's
4266 // constrained to use only GE, GT, VS and EQ.
4267 //
4268 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
4269 // swap the operands of the previous compare instruction (effectively
4270 // inverting the compare condition, swapping 'less' and 'greater') and
4271 // sometimes need to swap the operands to the VSEL (which inverts the
4272 // condition in the sense of firing whenever the previous condition didn't)
4273 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4274 TrueVal.getValueType() == MVT::f64)) {
4275 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4276 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
4277 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
4278 CC = ISD::getSetCCInverse(CC, true);
4279 std::swap(TrueVal, FalseVal);
4280 }
4281 }
4282
4283 SDValue ARMcc;
4284 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4285 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4286 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4287 }
4288
4289 ARMCC::CondCodes CondCode, CondCode2;
4290 bool InvalidOnQNaN;
4291 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4292
4293 // Try to generate VMAXNM/VMINNM on ARMv8.
4294 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4295 TrueVal.getValueType() == MVT::f64)) {
4296 bool swpCmpOps = false;
4297 bool swpVselOps = false;
4298 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
4299
4300 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
4301 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
4302 if (swpCmpOps)
4303 std::swap(LHS, RHS);
4304 if (swpVselOps)
4305 std::swap(TrueVal, FalseVal);
4306 }
4307 }
4308
4309 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4310 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4311 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4312 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4313 if (CondCode2 != ARMCC::AL) {
4314 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
4315 // FIXME: Needs another CMP because flag can have but one use.
4316 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4317 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
4318 }
4319 return Result;
4320}
4321
4322/// canChangeToInt - Given the fp compare operand, return true if it is suitable
4323/// to morph to an integer compare sequence.
4324static bool canChangeToInt(SDValue Op, bool &SeenZero,
4325 const ARMSubtarget *Subtarget) {
4326 SDNode *N = Op.getNode();
4327 if (!N->hasOneUse())
4328 // Otherwise it requires moving the value from fp to integer registers.
4329 return false;
4330 if (!N->getNumValues())
4331 return false;
4332 EVT VT = Op.getValueType();
4333 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
4334 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
4335 // vmrs are very slow, e.g. cortex-a8.
4336 return false;
4337
4338 if (isFloatingPointZero(Op)) {
4339 SeenZero = true;
4340 return true;
4341 }
4342 return ISD::isNormalLoad(N);
4343}
4344
4345static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
4346 if (isFloatingPointZero(Op))
4347 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
4348
4349 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
4350 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
4351 Ld->getPointerInfo(), Ld->getAlignment(),
4352 Ld->getMemOperand()->getFlags());
4353
4354 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4354)
;
4355}
4356
4357static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
4358 SDValue &RetVal1, SDValue &RetVal2) {
4359 SDLoc dl(Op);
4360
4361 if (isFloatingPointZero(Op)) {
4362 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
4363 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
4364 return;
4365 }
4366
4367 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
4368 SDValue Ptr = Ld->getBasePtr();
4369 RetVal1 =
4370 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
4371 Ld->getAlignment(), Ld->getMemOperand()->getFlags());
4372
4373 EVT PtrType = Ptr.getValueType();
4374 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
4375 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
4376 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
4377 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
4378 Ld->getPointerInfo().getWithOffset(4), NewAlign,
4379 Ld->getMemOperand()->getFlags());
4380 return;
4381 }
4382
4383 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4383)
;
4384}
4385
4386/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
4387/// f32 and even f64 comparisons to integer ones.
4388SDValue
4389ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
4390 SDValue Chain = Op.getOperand(0);
4391 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4392 SDValue LHS = Op.getOperand(2);
4393 SDValue RHS = Op.getOperand(3);
4394 SDValue Dest = Op.getOperand(4);
4395 SDLoc dl(Op);
4396
4397 bool LHSSeenZero = false;
4398 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
4399 bool RHSSeenZero = false;
4400 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
4401 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
4402 // If unsafe fp math optimization is enabled and there are no other uses of
4403 // the CMP operands, and the condition code is EQ or NE, we can optimize it
4404 // to an integer comparison.
4405 if (CC == ISD::SETOEQ)
4406 CC = ISD::SETEQ;
4407 else if (CC == ISD::SETUNE)
4408 CC = ISD::SETNE;
4409
4410 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4411 SDValue ARMcc;
4412 if (LHS.getValueType() == MVT::f32) {
4413 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4414 bitcastf32Toi32(LHS, DAG), Mask);
4415 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4416 bitcastf32Toi32(RHS, DAG), Mask);
4417 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4418 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4419 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4420 Chain, Dest, ARMcc, CCR, Cmp);
4421 }
4422
4423 SDValue LHS1, LHS2;
4424 SDValue RHS1, RHS2;
4425 expandf64Toi32(LHS, DAG, LHS1, LHS2);
4426 expandf64Toi32(RHS, DAG, RHS1, RHS2);
4427 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
4428 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
4429 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4430 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4431 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4432 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
4433 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
4434 }
4435
4436 return SDValue();
4437}
4438
4439SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4440 SDValue Chain = Op.getOperand(0);
4441 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4442 SDValue LHS = Op.getOperand(2);
4443 SDValue RHS = Op.getOperand(3);
4444 SDValue Dest = Op.getOperand(4);
4445 SDLoc dl(Op);
4446
4447 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4448 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4449 dl);
4450
4451 // If softenSetCCOperands only returned one value, we should compare it to
4452 // zero.
4453 if (!RHS.getNode()) {
4454 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4455 CC = ISD::SETNE;
4456 }
4457 }
4458
4459 if (LHS.getValueType() == MVT::i32) {
4460 SDValue ARMcc;
4461 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4462 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4463 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4464 Chain, Dest, ARMcc, CCR, Cmp);
4465 }
4466
4467 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)(static_cast <bool> (LHS.getValueType() == MVT::f32 || LHS
.getValueType() == MVT::f64) ? void (0) : __assert_fail ("LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4467, __extension__ __PRETTY_FUNCTION__))
;
4468
4469 if (getTargetMachine().Options.UnsafeFPMath &&
4470 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
4471 CC == ISD::SETNE || CC == ISD::SETUNE)) {
4472 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
4473 return Result;
4474 }
4475
4476 ARMCC::CondCodes CondCode, CondCode2;
4477 bool InvalidOnQNaN;
4478 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4479
4480 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4481 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4482 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4483 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4484 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
4485 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4486 if (CondCode2 != ARMCC::AL) {
4487 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
4488 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
4489 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4490 }
4491 return Res;
4492}
4493
4494SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
4495 SDValue Chain = Op.getOperand(0);
4496 SDValue Table = Op.getOperand(1);
4497 SDValue Index = Op.getOperand(2);
4498 SDLoc dl(Op);
4499
4500 EVT PTy = getPointerTy(DAG.getDataLayout());
4501 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
4502 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
4503 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
4504 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
4505 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Index);
4506 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
4507 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
4508 // which does another jump to the destination. This also makes it easier
4509 // to translate it to TBB / TBH later (Thumb2 only).
4510 // FIXME: This might not work if the function is extremely large.
4511 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
4512 Addr, Op.getOperand(2), JTI);
4513 }
4514 if (isPositionIndependent() || Subtarget->isROPI()) {
4515 Addr =
4516 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
4517 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4518 Chain = Addr.getValue(1);
4519 Addr = DAG.getNode(ISD::ADD, dl, PTy, Table, Addr);
4520 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4521 } else {
4522 Addr =
4523 DAG.getLoad(PTy, dl, Chain, Addr,
4524 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4525 Chain = Addr.getValue(1);
4526 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4527 }
4528}
4529
4530static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
4531 EVT VT = Op.getValueType();
4532 SDLoc dl(Op);
4533
4534 if (Op.getValueType().getVectorElementType() == MVT::i32) {
4535 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
4536 return Op;
4537 return DAG.UnrollVectorOp(Op.getNode());
4538 }
4539
4540 assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&(static_cast <bool> (Op.getOperand(0).getValueType() ==
MVT::v4f32 && "Invalid type for custom lowering!") ?
void (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4541, __extension__ __PRETTY_FUNCTION__))
4541 "Invalid type for custom lowering!")(static_cast <bool> (Op.getOperand(0).getValueType() ==
MVT::v4f32 && "Invalid type for custom lowering!") ?
void (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4541, __extension__ __PRETTY_FUNCTION__))
;
4542 if (VT != MVT::v4i16)
4543 return DAG.UnrollVectorOp(Op.getNode());
4544
4545 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
4546 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
4547}
4548
4549SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
4550 EVT VT = Op.getValueType();
4551 if (VT.isVector())
4552 return LowerVectorFP_TO_INT(Op, DAG);
4553 if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
4554 RTLIB::Libcall LC;
4555 if (Op.getOpcode() == ISD::FP_TO_SINT)
4556 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
4557 Op.getValueType());
4558 else
4559 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
4560 Op.getValueType());
4561 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4562 /*isSigned*/ false, SDLoc(Op)).first;
4563 }
4564
4565 return Op;
4566}
4567
4568static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
4569 EVT VT = Op.getValueType();
4570 SDLoc dl(Op);
4571
4572 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
4573 if (VT.getVectorElementType() == MVT::f32)
4574 return Op;
4575 return DAG.UnrollVectorOp(Op.getNode());
4576 }
4577
4578 assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&(static_cast <bool> (Op.getOperand(0).getValueType() ==
MVT::v4i16 && "Invalid type for custom lowering!") ?
void (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4579, __extension__ __PRETTY_FUNCTION__))
4579 "Invalid type for custom lowering!")(static_cast <bool> (Op.getOperand(0).getValueType() ==
MVT::v4i16 && "Invalid type for custom lowering!") ?
void (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4579, __extension__ __PRETTY_FUNCTION__))
;
4580 if (VT != MVT::v4f32)
4581 return DAG.UnrollVectorOp(Op.getNode());
4582
4583 unsigned CastOpc;
4584 unsigned Opc;
4585 switch (Op.getOpcode()) {
4586 default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4586)
;
4587 case ISD::SINT_TO_FP:
4588 CastOpc = ISD::SIGN_EXTEND;
4589 Opc = ISD::SINT_TO_FP;
4590 break;
4591 case ISD::UINT_TO_FP:
4592 CastOpc = ISD::ZERO_EXTEND;
4593 Opc = ISD::UINT_TO_FP;
4594 break;
4595 }
4596
4597 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
4598 return DAG.getNode(Opc, dl, VT, Op);
4599}
4600
4601SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
4602 EVT VT = Op.getValueType();
4603 if (VT.isVector())
4604 return LowerVectorINT_TO_FP(Op, DAG);
4605 if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
4606 RTLIB::Libcall LC;
4607 if (Op.getOpcode() == ISD::SINT_TO_FP)
4608 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
4609 Op.getValueType());
4610 else
4611 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
4612 Op.getValueType());
4613 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4614 /*isSigned*/ false, SDLoc(Op)).first;
4615 }
4616
4617 return Op;
4618}
4619
4620SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
4621 // Implement fcopysign with a fabs and a conditional fneg.
4622 SDValue Tmp0 = Op.getOperand(0);
4623 SDValue Tmp1 = Op.getOperand(1);
4624 SDLoc dl(Op);
4625 EVT VT = Op.getValueType();
4626 EVT SrcVT = Tmp1.getValueType();
4627 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
4628 Tmp0.getOpcode() == ARMISD::VMOVDRR;
4629 bool UseNEON = !InGPR && Subtarget->hasNEON();
4630
4631 if (UseNEON) {
4632 // Use VBSL to copy the sign bit.
4633 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
4634 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
4635 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
4636 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
4637 if (VT == MVT::f64)
4638 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4639 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
4640 DAG.getConstant(32, dl, MVT::i32));
4641 else /*if (VT == MVT::f32)*/
4642 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
4643 if (SrcVT == MVT::f32) {
4644 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
4645 if (VT == MVT::f64)
4646 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4647 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
4648 DAG.getConstant(32, dl, MVT::i32));
4649 } else if (VT == MVT::f32)
4650 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
4651 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
4652 DAG.getConstant(32, dl, MVT::i32));
4653 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
4654 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
4655
4656 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
4657 dl, MVT::i32);
4658 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
4659 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
4660 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
4661
4662 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
4663 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
4664 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
4665 if (VT == MVT::f32) {
4666 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
4667 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
4668 DAG.getConstant(0, dl, MVT::i32));
4669 } else {
4670 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
4671 }
4672
4673 return Res;
4674 }
4675
4676 // Bitcast operand 1 to i32.
4677 if (SrcVT == MVT::f64)
4678 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4679 Tmp1).getValue(1);
4680 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
4681
4682 // Or in the signbit with integer operations.
4683 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
4684 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4685 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
4686 if (VT == MVT::f32) {
4687 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
4688 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
4689 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4690 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
4691 }
4692
4693 // f64: Or the high part with signbit and then combine two parts.
4694 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4695 Tmp0);
4696 SDValue Lo = Tmp0.getValue(0);
4697 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
4698 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
4699 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
4700}
4701
4702SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
4703 MachineFunction &MF = DAG.getMachineFunction();
4704 MachineFrameInfo &MFI = MF.getFrameInfo();
4705 MFI.setReturnAddressIsTaken(true);
4706
4707 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
4708 return SDValue();
4709
4710 EVT VT = Op.getValueType();
4711 SDLoc dl(Op);
4712 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4713 if (Depth) {
4714 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4715 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
4716 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
4717 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
4718 MachinePointerInfo());
4719 }
4720
4721 // Return LR, which contains the return address. Mark it an implicit live-in.
4722 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4723 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
4724}
4725
4726SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
4727 const ARMBaseRegisterInfo &ARI =
4728 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
4729 MachineFunction &MF = DAG.getMachineFunction();
4730 MachineFrameInfo &MFI = MF.getFrameInfo();
4731 MFI.setFrameAddressIsTaken(true);
4732
4733 EVT VT = Op.getValueType();
4734 SDLoc dl(Op); // FIXME probably not meaningful
4735 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4736 unsigned FrameReg = ARI.getFrameRegister(MF);
4737 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
4738 while (Depth--)
4739 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
4740 MachinePointerInfo());
4741 return FrameAddr;
4742}
4743
4744// FIXME? Maybe this could be a TableGen attribute on some registers and
4745// this table could be generated automatically from RegInfo.
4746unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
4747 SelectionDAG &DAG) const {
4748 unsigned Reg = StringSwitch<unsigned>(RegName)
4749 .Case("sp", ARM::SP)
4750 .Default(0);
4751 if (Reg)
4752 return Reg;
4753 report_fatal_error(Twine("Invalid register name \""
4754 + StringRef(RegName) + "\"."));
4755}
4756
4757// Result is 64 bit value so split into two 32 bit values and return as a
4758// pair of values.
4759static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
4760 SelectionDAG &DAG) {
4761 SDLoc DL(N);
4762
4763 // This function is only supposed to be called for i64 type destination.
4764 assert(N->getValueType(0) == MVT::i64(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"ExpandREAD_REGISTER called for non-i64 type result.") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4765, __extension__ __PRETTY_FUNCTION__))
4765 && "ExpandREAD_REGISTER called for non-i64 type result.")(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"ExpandREAD_REGISTER called for non-i64 type result.") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4765, __extension__ __PRETTY_FUNCTION__))
;
4766
4767 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
4768 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
4769 N->getOperand(0),
4770 N->getOperand(1));
4771
4772 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
4773 Read.getValue(1)));
4774 Results.push_back(Read.getOperand(0));
4775}
4776
4777/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
4778/// When \p DstVT, the destination type of \p BC, is on the vector
4779/// register bank and the source of bitcast, \p Op, operates on the same bank,
4780/// it might be possible to combine them, such that everything stays on the
4781/// vector register bank.
4782/// \p return The node that would replace \p BT, if the combine
4783/// is possible.
4784static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
4785 SelectionDAG &DAG) {
4786 SDValue Op = BC->getOperand(0);
4787 EVT DstVT = BC->getValueType(0);
4788
4789 // The only vector instruction that can produce a scalar (remember,
4790 // since the bitcast was about to be turned into VMOVDRR, the source
4791 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
4792 // Moreover, we can do this combine only if there is one use.
4793 // Finally, if the destination type is not a vector, there is not
4794 // much point on forcing everything on the vector bank.
4795 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
4796 !Op.hasOneUse())
4797 return SDValue();
4798
4799 // If the index is not constant, we will introduce an additional
4800 // multiply that will stick.
4801 // Give up in that case.
4802 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4803 if (!Index)
4804 return SDValue();
4805 unsigned DstNumElt = DstVT.getVectorNumElements();
4806
4807 // Compute the new index.
4808 const APInt &APIntIndex = Index->getAPIntValue();
4809 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
4810 NewIndex *= APIntIndex;
4811 // Check if the new constant index fits into i32.
4812 if (NewIndex.getBitWidth() > 32)
4813 return SDValue();
4814
4815 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
4816 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
4817 SDLoc dl(Op);
4818 SDValue ExtractSrc = Op.getOperand(0);
4819 EVT VecVT = EVT::getVectorVT(
4820 *DAG.getContext(), DstVT.getScalarType(),
4821 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
4822 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
4823 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
4824 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
4825}
4826
4827/// ExpandBITCAST - If the target supports VFP, this function is called to
4828/// expand a bit convert where either the source or destination type is i64 to
4829/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
4830/// operand type is illegal (e.g., v2f32 for a target that doesn't support
4831/// vectors), since the legalizer won't know what to do with that.
4832static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
4833 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4834 SDLoc dl(N);
4835 SDValue Op = N->getOperand(0);
4836
4837 // This function is only supposed to be called for i64 types, either as the
4838 // source or destination of the bit convert.
4839 EVT SrcVT = Op.getValueType();
4840 EVT DstVT = N->getValueType(0);
4841 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&(static_cast <bool> ((SrcVT == MVT::i64 || DstVT == MVT
::i64) && "ExpandBITCAST called for non-i64 type") ? void
(0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4842, __extension__ __PRETTY_FUNCTION__))
4842 "ExpandBITCAST called for non-i64 type")(static_cast <bool> ((SrcVT == MVT::i64 || DstVT == MVT
::i64) && "ExpandBITCAST called for non-i64 type") ? void
(0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4842, __extension__ __PRETTY_FUNCTION__))
;
4843
4844 // Turn i64->f64 into VMOVDRR.
4845 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
4846 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
4847 // if we can combine the bitcast with its source.
4848 if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
4849 return Val;
4850
4851 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4852 DAG.getConstant(0, dl, MVT::i32));
4853 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4854 DAG.getConstant(1, dl, MVT::i32));
4855 return DAG.getNode(ISD::BITCAST, dl, DstVT,
4856 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
4857 }
4858
4859 // Turn f64->i64 into VMOVRRD.
4860 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
4861 SDValue Cvt;
4862 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
4863 SrcVT.getVectorNumElements() > 1)
4864 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4865 DAG.getVTList(MVT::i32, MVT::i32),
4866 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
4867 else
4868 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4869 DAG.getVTList(MVT::i32, MVT::i32), Op);
4870 // Merge the pieces into a single i64 value.
4871 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
4872 }
4873
4874 return SDValue();
4875}
4876
4877/// getZeroVector - Returns a vector of specified type with all zero elements.
4878/// Zero vectors are used to represent vector negation and in those cases
4879/// will be implemented with the NEON VNEG instruction. However, VNEG does
4880/// not support i64 elements, so sometimes the zero vectors will need to be
4881/// explicitly constructed. Regardless, use a canonical VMOV to create the
4882/// zero vector.
4883static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4884 assert(VT.isVector() && "Expected a vector type")(static_cast <bool> (VT.isVector() && "Expected a vector type"
) ? void (0) : __assert_fail ("VT.isVector() && \"Expected a vector type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4884, __extension__ __PRETTY_FUNCTION__))
;
4885 // The canonical modified immediate encoding of a zero vector is....0!
4886 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
4887 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
4888 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
4889 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4890}
4891
4892/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4893/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4894SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
4895 SelectionDAG &DAG) const {
4896 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4896, __extension__ __PRETTY_FUNCTION__))
;
4897 EVT VT = Op.getValueType();
4898 unsigned VTBits = VT.getSizeInBits();
4899 SDLoc dl(Op);
4900 SDValue ShOpLo = Op.getOperand(0);
4901 SDValue ShOpHi = Op.getOperand(1);
4902 SDValue ShAmt = Op.getOperand(2);
4903 SDValue ARMcc;
4904 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4905 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4906
4907 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SRA_PARTS ||
Op.getOpcode() == ISD::SRL_PARTS) ? void (0) : __assert_fail
("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4907, __extension__ __PRETTY_FUNCTION__))
;
4908
4909 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4910 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4911 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4912 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4913 DAG.getConstant(VTBits, dl, MVT::i32));
4914 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4915 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4916 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4917 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4918 ISD::SETGE, ARMcc, DAG, dl);
4919 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
4920 ARMcc, CCR, CmpLo);
4921
4922 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4923 SDValue HiBigShift = Opc == ISD::SRA
4924 ? DAG.getNode(Opc, dl, VT, ShOpHi,
4925 DAG.getConstant(VTBits - 1, dl, VT))
4926 : DAG.getConstant(0, dl, VT);
4927 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4928 ISD::SETGE, ARMcc, DAG, dl);
4929 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
4930 ARMcc, CCR, CmpHi);
4931
4932 SDValue Ops[2] = { Lo, Hi };
4933 return DAG.getMergeValues(Ops, dl);
4934}
4935
4936/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4937/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4938SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
4939 SelectionDAG &DAG) const {
4940 assert(Op.getNumOperands() == 3 && "Not a double-shift!")(static_cast <bool> (Op.getNumOperands() == 3 &&
"Not a double-shift!") ? void (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4940, __extension__ __PRETTY_FUNCTION__))
;
4941 EVT VT = Op.getValueType();
4942 unsigned VTBits = VT.getSizeInBits();
4943 SDLoc dl(Op);
4944 SDValue ShOpLo = Op.getOperand(0);
4945 SDValue ShOpHi = Op.getOperand(1);
4946 SDValue ShAmt = Op.getOperand(2);
4947 SDValue ARMcc;
4948 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4949
4950 assert(Op.getOpcode() == ISD::SHL_PARTS)(static_cast <bool> (Op.getOpcode() == ISD::SHL_PARTS) ?
void (0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4950, __extension__ __PRETTY_FUNCTION__))
;
4951 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4952 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4953 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4954 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4955 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4956
4957 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4958 DAG.getConstant(VTBits, dl, MVT::i32));
4959 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4960 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4961 ISD::SETGE, ARMcc, DAG, dl);
4962 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
4963 ARMcc, CCR, CmpHi);
4964
4965 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4966 ISD::SETGE, ARMcc, DAG, dl);
4967 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4968 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
4969 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
4970
4971 SDValue Ops[2] = { Lo, Hi };
4972 return DAG.getMergeValues(Ops, dl);
4973}
4974
4975SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
4976 SelectionDAG &DAG) const {
4977 // The rounding mode is in bits 23:22 of the FPSCR.
4978 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
4979 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
4980 // so that the shift + and get folded into a bitfield extract.
4981 SDLoc dl(Op);
4982 SDValue Ops[] = { DAG.getEntryNode(),
4983 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
4984
4985 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
4986 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
4987 DAG.getConstant(1U << 22, dl, MVT::i32));
4988 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
4989 DAG.getConstant(22, dl, MVT::i32));
4990 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
4991 DAG.getConstant(3, dl, MVT::i32));
4992}
4993
4994static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
4995 const ARMSubtarget *ST) {
4996 SDLoc dl(N);
4997 EVT VT = N->getValueType(0);
4998 if (VT.isVector()) {
4999 assert(ST->hasNEON())(static_cast <bool> (ST->hasNEON()) ? void (0) : __assert_fail
("ST->hasNEON()", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 4999, __extension__ __PRETTY_FUNCTION__))
;
5000
5001 // Compute the least significant set bit: LSB = X & -X
5002 SDValue X = N->getOperand(0);
5003 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
5004 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
5005
5006 EVT ElemTy = VT.getVectorElementType();
5007
5008 if (ElemTy == MVT::i8) {
5009 // Compute with: cttz(x) = ctpop(lsb - 1)
5010 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5011 DAG.getTargetConstant(1, dl, ElemTy));
5012 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5013 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
5014 }
5015
5016 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
5017 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
5018 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
5019 unsigned NumBits = ElemTy.getSizeInBits();
5020 SDValue WidthMinus1 =
5021 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5022 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
5023 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
5024 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
5025 }
5026
5027 // Compute with: cttz(x) = ctpop(lsb - 1)
5028
5029 // Since we can only compute the number of bits in a byte with vcnt.8, we
5030 // have to gather the result with pairwise addition (vpaddl) for i16, i32,
5031 // and i64.
5032
5033 // Compute LSB - 1.
5034 SDValue Bits;
5035 if (ElemTy == MVT::i64) {
5036 // Load constant 0xffff'ffff'ffff'ffff to register.
5037 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5038 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
5039 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
5040 } else {
5041 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5042 DAG.getTargetConstant(1, dl, ElemTy));
5043 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5044 }
5045
5046 // Count #bits with vcnt.8.
5047 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5048 SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
5049 SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
5050
5051 // Gather the #bits with vpaddl (pairwise add.)
5052 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5053 SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
5054 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5055 Cnt8);
5056 if (ElemTy == MVT::i16)
5057 return Cnt16;
5058
5059 EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
5060 SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
5061 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5062 Cnt16);
5063 if (ElemTy == MVT::i32)
5064 return Cnt32;
5065
5066 assert(ElemTy == MVT::i64)(static_cast <bool> (ElemTy == MVT::i64) ? void (0) : __assert_fail
("ElemTy == MVT::i64", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5066, __extension__ __PRETTY_FUNCTION__))
;
5067 SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5068 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5069 Cnt32);
5070 return Cnt64;
5071 }
5072
5073 if (!ST->hasV6T2Ops())
5074 return SDValue();
5075
5076 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
5077 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
5078}
5079
5080/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
5081/// for each 16-bit element from operand, repeated. The basic idea is to
5082/// leverage vcnt to get the 8-bit counts, gather and add the results.
5083///
5084/// Trace for v4i16:
5085/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
5086/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
5087/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
5088/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
5089/// [b0 b1 b2 b3 b4 b5 b6 b7]
5090/// +[b1 b0 b3 b2 b5 b4 b7 b6]
5091/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
5092/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
5093static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
5094 EVT VT = N->getValueType(0);
5095 SDLoc DL(N);
5096
5097 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5098 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
5099 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
5100 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
5101 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
5102 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
5103}
5104
5105/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
5106/// bit-count for each 16-bit element from the operand. We need slightly
5107/// different sequencing for v4i16 and v8i16 to stay within NEON's available
5108/// 64/128-bit registers.
5109///
5110/// Trace for v4i16:
5111/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
5112/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
5113/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
5114/// v4i16:Extracted = [k0 k1 k2 k3 ]
5115static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
5116 EVT VT = N->getValueType(0);
5117 SDLoc DL(N);
5118
5119 SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
5120 if (VT.is64BitVector()) {
5121 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
5122 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
5123 DAG.getIntPtrConstant(0, DL));
5124 } else {
5125 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
5126 BitCounts, DAG.getIntPtrConstant(0, DL));
5127 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
5128 }
5129}
5130
5131/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
5132/// bit-count for each 32-bit element from the operand. The idea here is
5133/// to split the vector into 16-bit elements, leverage the 16-bit count
5134/// routine, and then combine the results.
5135///
5136/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
5137/// input = [v0 v1 ] (vi: 32-bit elements)
5138/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
5139/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
5140/// vrev: N0 = [k1 k0 k3 k2 ]
5141/// [k0 k1 k2 k3 ]
5142/// N1 =+[k1 k0 k3 k2 ]
5143/// [k0 k2 k1 k3 ]
5144/// N2 =+[k1 k3 k0 k2 ]
5145/// [k0 k2 k1 k3 ]
5146/// Extended =+[k1 k3 k0 k2 ]
5147/// [k0 k2 ]
5148/// Extracted=+[k1 k3 ]
5149///
5150static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
5151 EVT VT = N->getValueType(0);
5152 SDLoc DL(N);
5153
5154 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5155
5156 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
5157 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
5158 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
5159 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
5160 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
5161
5162 if (VT.is64BitVector()) {
5163 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
5164 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
5165 DAG.getIntPtrConstant(0, DL));
5166 } else {
5167 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
5168 DAG.getIntPtrConstant(0, DL));
5169 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
5170 }
5171}
5172
5173static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
5174 const ARMSubtarget *ST) {
5175 EVT VT = N->getValueType(0);
5176
5177 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.")(static_cast <bool> (ST->hasNEON() && "Custom ctpop lowering requires NEON."
) ? void (0) : __assert_fail ("ST->hasNEON() && \"Custom ctpop lowering requires NEON.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5177, __extension__ __PRETTY_FUNCTION__))
;
5178 assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||(static_cast <bool> ((VT == MVT::v2i32 || VT == MVT::v4i32
|| VT == MVT::v4i16 || VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5180, __extension__ __PRETTY_FUNCTION__))
5179 VT == MVT::v4i16 || VT == MVT::v8i16) &&(static_cast <bool> ((VT == MVT::v2i32 || VT == MVT::v4i32
|| VT == MVT::v4i16 || VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5180, __extension__ __PRETTY_FUNCTION__))
5180 "Unexpected type for custom ctpop lowering")(static_cast <bool> ((VT == MVT::v2i32 || VT == MVT::v4i32
|| VT == MVT::v4i16 || VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? void (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5180, __extension__ __PRETTY_FUNCTION__))
;
5181
5182 if (VT.getVectorElementType() == MVT::i32)
5183 return lowerCTPOP32BitElements(N, DAG);
5184 else
5185 return lowerCTPOP16BitElements(N, DAG);
5186}
5187
5188static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
5189 const ARMSubtarget *ST) {
5190 EVT VT = N->getValueType(0);
5191 SDLoc dl(N);
5192
5193 if (!VT.isVector())
5194 return SDValue();
5195
5196 // Lower vector shifts on NEON to use VSHL.
5197 assert(ST->hasNEON() && "unexpected vector shift")(static_cast <bool> (ST->hasNEON() && "unexpected vector shift"
) ? void (0) : __assert_fail ("ST->hasNEON() && \"unexpected vector shift\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5197, __extension__ __PRETTY_FUNCTION__))
;
5198
5199 // Left shifts translate directly to the vshiftu intrinsic.
5200 if (N->getOpcode() == ISD::SHL)
5201 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5202 DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
5203 MVT::i32),
5204 N->getOperand(0), N->getOperand(1));
5205
5206 assert((N->getOpcode() == ISD::SRA ||(static_cast <bool> ((N->getOpcode() == ISD::SRA || N
->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5207, __extension__ __PRETTY_FUNCTION__))
5207 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode")(static_cast <bool> ((N->getOpcode() == ISD::SRA || N
->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5207, __extension__ __PRETTY_FUNCTION__))
;
5208
5209 // NEON uses the same intrinsics for both left and right shifts. For
5210 // right shifts, the shift amounts are negative, so negate the vector of
5211 // shift amounts.
5212 EVT ShiftVT = N->getOperand(1).getValueType();
5213 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
5214 getZeroVector(ShiftVT, DAG, dl),
5215 N->getOperand(1));
5216 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
5217 Intrinsic::arm_neon_vshifts :
5218 Intrinsic::arm_neon_vshiftu);
5219 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5220 DAG.getConstant(vshiftInt, dl, MVT::i32),
5221 N->getOperand(0), NegatedCount);
5222}
5223
5224static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
5225 const ARMSubtarget *ST) {
5226 EVT VT = N->getValueType(0);
5227 SDLoc dl(N);
5228
5229 // We can get here for a node like i32 = ISD::SHL i32, i64
5230 if (VT != MVT::i64)
5231 return SDValue();
5232
5233 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA) && "Unknown shift to lower!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5234, __extension__ __PRETTY_FUNCTION__))
5234 "Unknown shift to lower!")(static_cast <bool> ((N->getOpcode() == ISD::SRL || N
->getOpcode() == ISD::SRA) && "Unknown shift to lower!"
) ? void (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5234, __extension__ __PRETTY_FUNCTION__))
;
5235
5236 // We only lower SRA, SRL of 1 here, all others use generic lowering.
5237 if (!isOneConstant(N->getOperand(1)))
5238 return SDValue();
5239
5240 // If we are in thumb mode, we don't have RRX.
5241 if (ST->isThumb1Only()) return SDValue();
5242
5243 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
5244 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5245 DAG.getConstant(0, dl, MVT::i32));
5246 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5247 DAG.getConstant(1, dl, MVT::i32));
5248
5249 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
5250 // captures the result into a carry flag.
5251 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
5252 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
5253
5254 // The low part is an ARMISD::RRX operand, which shifts the carry in.
5255 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
5256
5257 // Merge the pieces into a single i64 value.
5258 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5259}
5260
5261static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
5262 SDValue TmpOp0, TmpOp1;
5263 bool Invert = false;
5264 bool Swap = false;
5265 unsigned Opc = 0;
5266
5267 SDValue Op0 = Op.getOperand(0);
5268 SDValue Op1 = Op.getOperand(1);
5269 SDValue CC = Op.getOperand(2);
5270 EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
5271 EVT VT = Op.getValueType();
5272 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
5273 SDLoc dl(Op);
5274
5275 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
5276 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
5277 // Special-case integer 64-bit equality comparisons. They aren't legal,
5278 // but they can be lowered with a few vector instructions.
5279 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
5280 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
5281 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
5282 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
5283 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
5284 DAG.getCondCode(ISD::SETEQ));
5285 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
5286 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
5287 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
5288 if (SetCCOpcode == ISD::SETNE)
5289 Merged = DAG.getNOT(dl, Merged, CmpVT);
5290 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
5291 return Merged;
5292 }
5293
5294 if (CmpVT.getVectorElementType() == MVT::i64)
5295 // 64-bit comparisons are not legal in general.
5296 return SDValue();
5297
5298 if (Op1.getValueType().isFloatingPoint()) {
5299 switch (SetCCOpcode) {
5300 default: llvm_unreachable("Illegal FP comparison")::llvm::llvm_unreachable_internal("Illegal FP comparison", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5300)
;
5301 case ISD::SETUNE:
5302 case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5303 case ISD::SETOEQ:
5304 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5305 case ISD::SETOLT:
5306 case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5307 case ISD::SETOGT:
5308 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5309 case ISD::SETOLE:
5310 case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5311 case ISD::SETOGE:
5312 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5313 case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5314 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
5315 case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5316 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
5317 case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5318 case ISD::SETONE:
5319 // Expand this to (OLT | OGT).
5320 TmpOp0 = Op0;
5321 TmpOp1 = Op1;
5322 Opc = ISD::OR;
5323 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5324 Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
5325 break;
5326 case ISD::SETUO:
5327 Invert = true;
5328 LLVM_FALLTHROUGH[[clang::fallthrough]];
5329 case ISD::SETO:
5330 // Expand this to (OLT | OGE).
5331 TmpOp0 = Op0;
5332 TmpOp1 = Op1;
5333 Opc = ISD::OR;
5334 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5335 Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
5336 break;
5337 }
5338 } else {
5339 // Integer comparisons.
5340 switch (SetCCOpcode) {
5341 default: llvm_unreachable("Illegal integer comparison")::llvm::llvm_unreachable_internal("Illegal integer comparison"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5341)
;
5342 case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5343 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5344 case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5345 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5346 case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5347 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5348 case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5349 case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
5350 case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5351 case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
5352 }
5353
5354 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
5355 if (Opc == ARMISD::VCEQ) {
5356 SDValue AndOp;
5357 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5358 AndOp = Op0;
5359 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
5360 AndOp = Op1;
5361
5362 // Ignore bitconvert.
5363 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
5364 AndOp = AndOp.getOperand(0);
5365
5366 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
5367 Opc = ARMISD::VTST;
5368 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
5369 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
5370 Invert = !Invert;
5371 }
5372 }
5373 }
5374
5375 if (Swap)
5376 std::swap(Op0, Op1);
5377
5378 // If one of the operands is a constant vector zero, attempt to fold the
5379 // comparison to a specialized compare-against-zero form.
5380 SDValue SingleOp;
5381 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5382 SingleOp = Op0;
5383 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
5384 if (Opc == ARMISD::VCGE)
5385 Opc = ARMISD::VCLEZ;
5386 else if (Opc == ARMISD::VCGT)
5387 Opc = ARMISD::VCLTZ;
5388 SingleOp = Op1;
5389 }
5390
5391 SDValue Result;
5392 if (SingleOp.getNode()) {
5393 switch (Opc) {
5394 case ARMISD::VCEQ:
5395 Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
5396 case ARMISD::VCGE:
5397 Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
5398 case ARMISD::VCLEZ:
5399 Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
5400 case ARMISD::VCGT:
5401 Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
5402 case ARMISD::VCLTZ:
5403 Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
5404 default:
5405 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5406 }
5407 } else {
5408 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5409 }
5410
5411 Result = DAG.getSExtOrTrunc(Result, dl, VT);
5412
5413 if (Invert)
5414 Result = DAG.getNOT(dl, Result, VT);
5415
5416 return Result;
5417}
5418
5419static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) {
5420 SDValue LHS = Op.getOperand(0);
5421 SDValue RHS = Op.getOperand(1);
5422 SDValue Carry = Op.getOperand(2);
5423 SDValue Cond = Op.getOperand(3);
5424 SDLoc DL(Op);
5425
5426 assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.")(static_cast <bool> (LHS.getSimpleValueType().isInteger
() && "SETCCE is integer only.") ? void (0) : __assert_fail
("LHS.getSimpleValueType().isInteger() && \"SETCCE is integer only.\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5426, __extension__ __PRETTY_FUNCTION__))
;
5427
5428 assert(Carry.getOpcode() != ISD::CARRY_FALSE)(static_cast <bool> (Carry.getOpcode() != ISD::CARRY_FALSE
) ? void (0) : __assert_fail ("Carry.getOpcode() != ISD::CARRY_FALSE"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5428, __extension__ __PRETTY_FUNCTION__))
;
5429 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
5430 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
5431
5432 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
5433 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
5434 SDValue ARMcc = DAG.getConstant(
5435 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
5436 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5437 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
5438 Cmp.getValue(1), SDValue());
5439 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
5440 CCR, Chain.getValue(1));
5441}
5442
5443/// isNEONModifiedImm - Check if the specified splat value corresponds to a
5444/// valid vector constant for a NEON instruction with a "modified immediate"
5445/// operand (e.g., VMOV). If so, return the encoded value.
5446static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
5447 unsigned SplatBitSize, SelectionDAG &DAG,
5448 const SDLoc &dl, EVT &VT, bool is128Bits,
5449 NEONModImmType type) {
5450 unsigned OpCmode, Imm;
5451
5452 // SplatBitSize is set to the smallest size that splats the vector, so a
5453 // zero vector will always have SplatBitSize == 8. However, NEON modified
5454 // immediate instructions others than VMOV do not support the 8-bit encoding
5455 // of a zero vector, and the default encoding of zero is supposed to be the
5456 // 32-bit version.
5457 if (SplatBits == 0)
5458 SplatBitSize = 32;
5459
5460 switch (SplatBitSize) {
5461 case 8:
5462 if (type != VMOVModImm)
5463 return SDValue();
5464 // Any 1-byte value is OK. Op=0, Cmode=1110.
5465 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big")(static_cast <bool> ((SplatBits & ~0xff) == 0 &&
"one byte splat value is too big") ? void (0) : __assert_fail
("(SplatBits & ~0xff) == 0 && \"one byte splat value is too big\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5465, __extension__ __PRETTY_FUNCTION__))
;
5466 OpCmode = 0xe;
5467 Imm = SplatBits;
5468 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
5469 break;
5470
5471 case 16:
5472 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
5473 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
5474 if ((SplatBits & ~0xff) == 0) {
5475 // Value = 0x00nn: Op=x, Cmode=100x.
5476 OpCmode = 0x8;
5477 Imm = SplatBits;
5478 break;
5479 }
5480 if ((SplatBits & ~0xff00) == 0) {
5481 // Value = 0xnn00: Op=x, Cmode=101x.
5482 OpCmode = 0xa;
5483 Imm = SplatBits >> 8;
5484 break;
5485 }
5486 return SDValue();
5487
5488 case 32:
5489 // NEON's 32-bit VMOV supports splat values where:
5490 // * only one byte is nonzero, or
5491 // * the least significant byte is 0xff and the second byte is nonzero, or
5492 // * the least significant 2 bytes are 0xff and the third is nonzero.
5493 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
5494 if ((SplatBits & ~0xff) == 0) {
5495 // Value = 0x000000nn: Op=x, Cmode=000x.
5496 OpCmode = 0;
5497 Imm = SplatBits;
5498 break;
5499 }
5500 if ((SplatBits & ~0xff00) == 0) {
5501 // Value = 0x0000nn00: Op=x, Cmode=001x.
5502 OpCmode = 0x2;
5503 Imm = SplatBits >> 8;
5504 break;
5505 }
5506 if ((SplatBits & ~0xff0000) == 0) {
5507 // Value = 0x00nn0000: Op=x, Cmode=010x.
5508 OpCmode = 0x4;
5509 Imm = SplatBits >> 16;
5510 break;
5511 }
5512 if ((SplatBits & ~0xff000000) == 0) {
5513 // Value = 0xnn000000: Op=x, Cmode=011x.
5514 OpCmode = 0x6;
5515 Imm = SplatBits >> 24;
5516 break;
5517 }
5518
5519 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
5520 if (type == OtherModImm) return SDValue();
5521
5522 if ((SplatBits & ~0xffff) == 0 &&
5523 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
5524 // Value = 0x0000nnff: Op=x, Cmode=1100.
5525 OpCmode = 0xc;
5526 Imm = SplatBits >> 8;
5527 break;
5528 }
5529
5530 if ((SplatBits & ~0xffffff) == 0 &&
5531 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
5532 // Value = 0x00nnffff: Op=x, Cmode=1101.
5533 OpCmode = 0xd;
5534 Imm = SplatBits >> 16;
5535 break;
5536 }
5537
5538 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
5539 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
5540 // VMOV.I32. A (very) minor optimization would be to replicate the value
5541 // and fall through here to test for a valid 64-bit splat. But, then the
5542 // caller would also need to check and handle the change in size.
5543 return SDValue();
5544
5545 case 64: {
5546 if (type != VMOVModImm)
5547 return SDValue();
5548 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
5549 uint64_t BitMask = 0xff;
5550 uint64_t Val = 0;
5551 unsigned ImmMask = 1;
5552 Imm = 0;
5553 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
5554 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
5555 Val |= BitMask;
5556 Imm |= ImmMask;
5557 } else if ((SplatBits & BitMask) != 0) {
5558 return SDValue();
5559 }
5560 BitMask <<= 8;
5561 ImmMask <<= 1;
5562 }
5563
5564 if (DAG.getDataLayout().isBigEndian())
5565 // swap higher and lower 32 bit word
5566 Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
5567
5568 // Op=1, Cmode=1110.
5569 OpCmode = 0x1e;
5570 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
5571 break;
5572 }
5573
5574 default:
5575 llvm_unreachable("unexpected size for isNEONModifiedImm")::llvm::llvm_unreachable_internal("unexpected size for isNEONModifiedImm"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5575)
;
5576 }
5577
5578 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
5579 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
5580}
5581
5582SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
5583 const ARMSubtarget *ST) const {
5584 bool IsDouble = Op.getValueType() == MVT::f64;
5585 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
5586 const APFloat &FPVal = CFP->getValueAPF();
5587
5588 // Prevent floating-point constants from using literal loads
5589 // when execute-only is enabled.
5590 if (ST->genExecuteOnly()) {
5591 APInt INTVal = FPVal.bitcastToAPInt();
5592 SDLoc DL(CFP);
5593 if (IsDouble) {
5594 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
5595 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
5596 if (!ST->isLittle())
5597 std::swap(Lo, Hi);
5598 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
5599 } else {
5600 return DAG.getConstant(INTVal, DL, MVT::i32);
5601 }
5602 }
5603
5604 if (!ST->hasVFP3())
5605 return SDValue();
5606
5607 // Use the default (constant pool) lowering for double constants when we have
5608 // an SP-only FPU
5609 if (IsDouble && Subtarget->isFPOnlySP())
5610 return SDValue();
5611
5612 // Try splatting with a VMOV.f32...
5613 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
5614
5615 if (ImmVal != -1) {
5616 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
5617 // We have code in place to select a valid ConstantFP already, no need to
5618 // do any mangling.
5619 return Op;
5620 }
5621
5622 // It's a float and we are trying to use NEON operations where
5623 // possible. Lower it to a splat followed by an extract.
5624 SDLoc DL(Op);
5625 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
5626 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
5627 NewVal);
5628 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
5629 DAG.getConstant(0, DL, MVT::i32));
5630 }
5631
5632 // The rest of our options are NEON only, make sure that's allowed before
5633 // proceeding..
5634 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
5635 return SDValue();
5636
5637 EVT VMovVT;
5638 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
5639
5640 // It wouldn't really be worth bothering for doubles except for one very
5641 // important value, which does happen to match: 0.0. So make sure we don't do
5642 // anything stupid.
5643 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
5644 return SDValue();
5645
5646 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
5647 SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
5648 VMovVT, false, VMOVModImm);
5649 if (NewVal != SDValue()) {
5650 SDLoc DL(Op);
5651 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
5652 NewVal);
5653 if (IsDouble)
5654 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5655
5656 // It's a float: cast and extract a vector element.
5657 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5658 VecConstant);
5659 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5660 DAG.getConstant(0, DL, MVT::i32));
5661 }
5662
5663 // Finally, try a VMVN.i32
5664 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
5665 false, VMVNModImm);
5666 if (NewVal != SDValue()) {
5667 SDLoc DL(Op);
5668 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
5669
5670 if (IsDouble)
5671 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5672
5673 // It's a float: cast and extract a vector element.
5674 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5675 VecConstant);
5676 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5677 DAG.getConstant(0, DL, MVT::i32));
5678 }
5679
5680 return SDValue();
5681}
5682
5683// check if an VEXT instruction can handle the shuffle mask when the
5684// vector sources of the shuffle are the same.
5685static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5686 unsigned NumElts = VT.getVectorNumElements();
5687
5688 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5689 if (M[0] < 0)
5690 return false;
5691
5692 Imm = M[0];
5693
5694 // If this is a VEXT shuffle, the immediate value is the index of the first
5695 // element. The other shuffle indices must be the successive elements after
5696 // the first one.
5697 unsigned ExpectedElt = Imm;
5698 for (unsigned i = 1; i < NumElts; ++i) {
5699 // Increment the expected index. If it wraps around, just follow it
5700 // back to index zero and keep going.
5701 ++ExpectedElt;
5702 if (ExpectedElt == NumElts)
5703 ExpectedElt = 0;
5704
5705 if (M[i] < 0) continue; // ignore UNDEF indices
5706 if (ExpectedElt != static_cast<unsigned>(M[i]))
5707 return false;
5708 }
5709
5710 return true;
5711}
5712
5713static bool isVEXTMask(ArrayRef<int> M, EVT VT,
5714 bool &ReverseVEXT, unsigned &Imm) {
5715 unsigned NumElts = VT.getVectorNumElements();
5716 ReverseVEXT = false;
5717
5718 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5719 if (M[0] < 0)
5720 return false;
5721
5722 Imm = M[0];
5723
5724 // If this is a VEXT shuffle, the immediate value is the index of the first
5725 // element. The other shuffle indices must be the successive elements after
5726 // the first one.
5727 unsigned ExpectedElt = Imm;
5728 for (unsigned i = 1; i < NumElts; ++i) {
5729 // Increment the expected index. If it wraps around, it may still be
5730 // a VEXT but the source vectors must be swapped.
5731 ExpectedElt += 1;
5732 if (ExpectedElt == NumElts * 2) {
5733 ExpectedElt = 0;
5734 ReverseVEXT = true;
5735 }
5736
5737 if (M[i] < 0) continue; // ignore UNDEF indices
5738 if (ExpectedElt != static_cast<unsigned>(M[i]))
5739 return false;
5740 }
5741
5742 // Adjust the index value if the source operands will be swapped.
5743 if (ReverseVEXT)
5744 Imm -= NumElts;
5745
5746 return true;
5747}
5748
5749/// isVREVMask - Check if a vector shuffle corresponds to a VREV
5750/// instruction with the specified blocksize. (The order of the elements
5751/// within each block of the vector is reversed.)
5752static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5753 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&(static_cast <bool> ((BlockSize==16 || BlockSize==32 ||
BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5754, __extension__ __PRETTY_FUNCTION__))
5754 "Only possible block sizes for VREV are: 16, 32, 64")(static_cast <bool> ((BlockSize==16 || BlockSize==32 ||
BlockSize==64) && "Only possible block sizes for VREV are: 16, 32, 64"
) ? void (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 5754, __extension__ __PRETTY_FUNCTION__))
;
5755
5756 unsigned EltSz = VT.getScalarSizeInBits();
5757 if (EltSz == 64)
5758 return false;
5759
5760 unsigned NumElts = VT.getVectorNumElements();
5761 unsigned BlockElts = M[0] + 1;
5762 // If the first shuffle index is UNDEF, be optimistic.
5763 if (M[0] < 0)
5764 BlockElts = BlockSize / EltSz;
5765
5766 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5767 return false;
5768
5769 for (unsigned i = 0; i < NumElts; ++i) {
5770 if (M[i] < 0) continue; // ignore UNDEF indices
5771 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
5772 return false;
5773 }
5774
5775 return true;
5776}
5777
5778static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
5779 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
5780 // range, then 0 is placed into the resulting vector. So pretty much any mask
5781 // of 8 elements can work here.
5782 return VT == MVT::v8i8 && M.size() == 8;
5783}
5784
5785static unsigned SelectPairHalf(unsigned Elements, ArrayRef<int> Mask,
5786 unsigned Index) {
5787 if (Mask.size() == Elements * 2)
5788 return Index / Elements;
5789 return Mask[Index] == 0 ? 0 : 1;
5790}
5791
5792// Checks whether the shuffle mask represents a vector transpose (VTRN) by
5793// checking that pairs of elements in the shuffle mask represent the same index
5794// in each vector, incrementing the expected index by 2 at each step.
5795// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
5796// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
5797// v2={e,f,g,h}
5798// WhichResult gives the offset for each element in the mask based on which
5799// of the two results it belongs to.
5800//
5801// The transpose can be represented either as:
5802// result1 = shufflevector v1, v2, result1_shuffle_mask
5803// result2 = shufflevector v1, v2, result2_shuffle_mask
5804// where v1/v2 and the shuffle masks have the same number of elements
5805// (here WhichResult (see below) indicates which result is being checked)
5806//
5807// or as:
5808// results = shufflevector v1, v2, shuffle_mask
5809// where both results are returned in one vector and the shuffle mask has twice
5810// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
5811// want to check the low half and high half of the shuffle mask as if it were
5812// the other case
5813static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5814 unsigned EltSz = VT.getScalarSizeInBits();
16
Calling 'EVT::getScalarSizeInBits'
31
Returning from 'EVT::getScalarSizeInBits'
5815 if (EltSz == 64)
32
Assuming 'EltSz' is not equal to 64
33
Taking false branch
5816 return false;
5817
5818 unsigned NumElts = VT.getVectorNumElements();
34
Calling 'EVT::getVectorNumElements'
42
Returning from 'EVT::getVectorNumElements'
5819 if (M.size() != NumElts && M.size() != NumElts*2)
5820 return false;
5821
5822 // If the mask is twice as long as the input vector then we need to check the
5823 // upper and lower parts of the mask with a matching value for WhichResult
5824 // FIXME: A mask with only even values will be rejected in case the first
5825 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
5826 // M[0] is used to determine WhichResult
5827 for (unsigned i = 0; i < M.size(); i += NumElts) {
43
Assuming the condition is false
44
Loop condition is false. Execution continues on line 5836
5828 WhichResult = SelectPairHalf(NumElts, M, i);
5829 for (unsigned j = 0; j < NumElts; j += 2) {
5830 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5831 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
5832 return false;
5833 }
5834 }
5835
5836 if (M.size() == NumElts*2)
45
Taking false branch
5837 WhichResult = 0;
5838
5839 return true;
5840}
5841
5842/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
5843/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5844/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5845static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5846 unsigned EltSz = VT.getScalarSizeInBits();
5847 if (EltSz == 64)
5848 return false;
5849
5850 unsigned NumElts = VT.getVectorNumElements();
5851 if (M.size() != NumElts && M.size() != NumElts*2)
5852 return false;
5853
5854 for (unsigned i = 0; i < M.size(); i += NumElts) {
5855 WhichResult = SelectPairHalf(NumElts, M, i);
5856 for (unsigned j = 0; j < NumElts; j += 2) {
5857 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5858 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
5859 return false;
5860 }
5861 }
5862
5863 if (M.size() == NumElts*2)
5864 WhichResult = 0;
5865
5866 return true;
5867}
5868
5869// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
5870// that the mask elements are either all even and in steps of size 2 or all odd
5871// and in steps of size 2.
5872// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
5873// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
5874// v2={e,f,g,h}
5875// Requires similar checks to that of isVTRNMask with
5876// respect the how results are returned.
5877static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5878 unsigned EltSz = VT.getScalarSizeInBits();
5879 if (EltSz == 64)
5880 return false;
5881
5882 unsigned NumElts = VT.getVectorNumElements();
5883 if (M.size() != NumElts && M.size() != NumElts*2)
5884 return false;
5885
5886 for (unsigned i = 0; i < M.size(); i += NumElts) {
5887 WhichResult = SelectPairHalf(NumElts, M, i);
5888 for (unsigned j = 0; j < NumElts; ++j) {
5889 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
5890 return false;
5891 }
5892 }
5893
5894 if (M.size() == NumElts*2)
5895 WhichResult = 0;
5896
5897 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5898 if (VT.is64BitVector() && EltSz == 32)
5899 return false;
5900
5901 return true;
5902}
5903
5904/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
5905/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5906/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5907static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5908 unsigned EltSz = VT.getScalarSizeInBits();
5909 if (EltSz == 64)
5910 return false;
5911
5912 unsigned NumElts = VT.getVectorNumElements();
5913 if (M.size() != NumElts && M.size() != NumElts*2)
5914 return false;
5915
5916 unsigned Half = NumElts / 2;
5917 for (unsigned i = 0; i < M.size(); i += NumElts) {
5918 WhichResult = SelectPairHalf(NumElts, M, i);
5919 for (unsigned j = 0; j < NumElts; j += Half) {
5920 unsigned Idx = WhichResult;
5921 for (unsigned k = 0; k < Half; ++k) {
5922 int MIdx = M[i + j + k];
5923 if (MIdx >= 0 && (unsigned) MIdx != Idx)
5924 return false;
5925 Idx += 2;
5926 }
5927 }
5928 }
5929
5930 if (M.size() == NumElts*2)
5931 WhichResult = 0;
5932
5933 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5934 if (VT.is64BitVector() && EltSz == 32)
5935 return false;
5936
5937 return true;
5938}
5939
5940// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
5941// that pairs of elements of the shufflemask represent the same index in each
5942// vector incrementing sequentially through the vectors.
5943// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
5944// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
5945// v2={e,f,g,h}
5946// Requires similar checks to that of isVTRNMask with respect the how results
5947// are returned.
5948static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5949 unsigned EltSz = VT.getScalarSizeInBits();
5950 if (EltSz == 64)
5951 return false;
5952
5953 unsigned NumElts = VT.getVectorNumElements();
5954 if (M.size() != NumElts && M.size() != NumElts*2)
5955 return false;
5956
5957 for (unsigned i = 0; i < M.size(); i += NumElts) {
5958 WhichResult = SelectPairHalf(NumElts, M, i);
5959 unsigned Idx = WhichResult * NumElts / 2;
5960 for (unsigned j = 0; j < NumElts; j += 2) {
5961 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
5962 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
5963 return false;
5964 Idx += 1;
5965 }
5966 }
5967
5968 if (M.size() == NumElts*2)
5969 WhichResult = 0;
5970
5971 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5972 if (VT.is64BitVector() && EltSz == 32)
5973 return false;
5974
5975 return true;
5976}
5977
5978/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
5979/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5980/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5981static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5982 unsigned EltSz = VT.getScalarSizeInBits();
5983 if (EltSz == 64)
5984 return false;
5985
5986 unsigned NumElts = VT.getVectorNumElements();
5987 if (M.size() != NumElts && M.size() != NumElts*2)
5988 return false;
5989
5990 for (unsigned i = 0; i < M.size(); i += NumElts) {
5991 WhichResult = SelectPairHalf(NumElts, M, i);
5992 unsigned Idx = WhichResult * NumElts / 2;
5993 for (unsigned j = 0; j < NumElts; j += 2) {
5994 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
5995 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
5996 return false;
5997 Idx += 1;
5998 }
5999 }
6000
6001 if (M.size() == NumElts*2)
6002 WhichResult = 0;
6003
6004 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
6005 if (VT.is64BitVector() && EltSz == 32)
6006 return false;
6007
6008 return true;
6009}
6010
6011/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
6012/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
6013static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
6014 unsigned &WhichResult,
6015 bool &isV_UNDEF) {
6016 isV_UNDEF = false;
6017 if (isVTRNMask(ShuffleMask, VT, WhichResult))
15
Calling 'isVTRNMask'
46
Returning from 'isVTRNMask'
47
Taking true branch
6018 return ARMISD::VTRN;
6019 if (isVUZPMask(ShuffleMask, VT, WhichResult))
6020 return ARMISD::VUZP;
6021 if (isVZIPMask(ShuffleMask, VT, WhichResult))
6022 return ARMISD::VZIP;
6023
6024 isV_UNDEF = true;
6025 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
6026 return ARMISD::VTRN;
6027 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6028 return ARMISD::VUZP;
6029 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6030 return ARMISD::VZIP;
6031
6032 return 0;
6033}
6034
6035/// \return true if this is a reverse operation on an vector.
6036static bool isReverseMask(ArrayRef<int> M, EVT VT) {
6037 unsigned NumElts = VT.getVectorNumElements();
6038 // Make sure the mask has the right size.
6039 if (NumElts != M.size())
6040 return false;
6041
6042 // Look for <15, ..., 3, -1, 1, 0>.
6043 for (unsigned i = 0; i != NumElts; ++i)
6044 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
6045 return false;
6046
6047 return true;
6048}
6049
6050// If N is an integer constant that can be moved into a register in one
6051// instruction, return an SDValue of such a constant (will become a MOV
6052// instruction). Otherwise return null.
6053static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
6054 const ARMSubtarget *ST, const SDLoc &dl) {
6055 uint64_t Val;
6056 if (!isa<ConstantSDNode>(N))
6057 return SDValue();
6058 Val = cast<ConstantSDNode>(N)->getZExtValue();
6059
6060 if (ST->isThumb1Only()) {
6061 if (Val <= 255 || ~Val <= 255)
6062 return DAG.getConstant(Val, dl, MVT::i32);
6063 } else {
6064 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
6065 return DAG.getConstant(Val, dl, MVT::i32);
6066 }
6067 return SDValue();
6068}
6069
6070// If this is a case we can't handle, return null and let the default
6071// expansion code take care of it.
6072SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
6073 const ARMSubtarget *ST) const {
6074 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
6075 SDLoc dl(Op);
6076 EVT VT = Op.getValueType();
6077
6078 APInt SplatBits, SplatUndef;
6079 unsigned SplatBitSize;
6080 bool HasAnyUndefs;
6081 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6082 if (SplatUndef.isAllOnesValue())
6083 return DAG.getUNDEF(VT);
6084
6085 if (SplatBitSize <= 64) {
6086 // Check if an immediate VMOV works.
6087 EVT VmovVT;
6088 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
6089 SplatUndef.getZExtValue(), SplatBitSize,
6090 DAG, dl, VmovVT, VT.is128BitVector(),
6091 VMOVModImm);
6092 if (Val.getNode()) {
6093 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
6094 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6095 }
6096
6097 // Try an immediate VMVN.
6098 uint64_t NegatedImm = (~SplatBits).getZExtValue();
6099 Val = isNEONModifiedImm(NegatedImm,
6100 SplatUndef.getZExtValue(), SplatBitSize,
6101 DAG, dl, VmovVT, VT.is128BitVector(),
6102 VMVNModImm);
6103 if (Val.getNode()) {
6104 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
6105 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6106 }
6107
6108 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
6109 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
6110 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
6111 if (ImmVal != -1) {
6112 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
6113 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
6114 }
6115 }
6116 }
6117 }
6118
6119 // Scan through the operands to see if only one value is used.
6120 //
6121 // As an optimisation, even if more than one value is used it may be more
6122 // profitable to splat with one value then change some lanes.
6123 //
6124 // Heuristically we decide to do this if the vector has a "dominant" value,
6125 // defined as splatted to more than half of the lanes.
6126 unsigned NumElts = VT.getVectorNumElements();
6127 bool isOnlyLowElement = true;
6128 bool usesOnlyOneValue = true;
6129 bool hasDominantValue = false;
6130 bool isConstant = true;
6131
6132 // Map of the number of times a particular SDValue appears in the
6133 // element list.
6134 DenseMap<SDValue, unsigned> ValueCounts;
6135 SDValue Value;
6136 for (unsigned i = 0; i < NumElts; ++i) {
6137 SDValue V = Op.getOperand(i);
6138 if (V.isUndef())
6139 continue;
6140 if (i > 0)
6141 isOnlyLowElement = false;
6142 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6143 isConstant = false;
6144
6145 ValueCounts.insert(std::make_pair(V, 0));
6146 unsigned &Count = ValueCounts[V];
6147
6148 // Is this value dominant? (takes up more than half of the lanes)
6149 if (++Count > (NumElts / 2)) {
6150 hasDominantValue = true;
6151 Value = V;
6152 }
6153 }
6154 if (ValueCounts.size() != 1)
6155 usesOnlyOneValue = false;
6156 if (!Value.getNode() && !ValueCounts.empty())
6157 Value = ValueCounts.begin()->first;
6158
6159 if (ValueCounts.empty())
6160 return DAG.getUNDEF(VT);
6161
6162 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
6163 // Keep going if we are hitting this case.
6164 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
6165 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6166
6167 unsigned EltSize = VT.getScalarSizeInBits();
6168
6169 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
6170 // i32 and try again.
6171 if (hasDominantValue && EltSize <= 32) {
6172 if (!isConstant) {
6173 SDValue N;
6174
6175 // If we are VDUPing a value that comes directly from a vector, that will
6176 // cause an unnecessary move to and from a GPR, where instead we could
6177 // just use VDUPLANE. We can only do this if the lane being extracted
6178 // is at a constant index, as the VDUP from lane instructions only have
6179 // constant-index forms.
6180 ConstantSDNode *constIndex;
6181 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6182 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
6183 // We need to create a new undef vector to use for the VDUPLANE if the
6184 // size of the vector from which we get the value is different than the
6185 // size of the vector that we need to create. We will insert the element
6186 // such that the register coalescer will remove unnecessary copies.
6187 if (VT != Value->getOperand(0).getValueType()) {
6188 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
6189 VT.getVectorNumElements();
6190 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6191 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
6192 Value, DAG.getConstant(index, dl, MVT::i32)),
6193 DAG.getConstant(index, dl, MVT::i32));
6194 } else
6195 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6196 Value->getOperand(0), Value->getOperand(1));
6197 } else
6198 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
6199
6200 if (!usesOnlyOneValue) {
6201 // The dominant value was splatted as 'N', but we now have to insert
6202 // all differing elements.
6203 for (unsigned I = 0; I < NumElts; ++I) {
6204 if (Op.getOperand(I) == Value)
6205 continue;
6206 SmallVector<SDValue, 3> Ops;
6207 Ops.push_back(N);
6208 Ops.push_back(Op.getOperand(I));
6209 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
6210 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
6211 }
6212 }
6213 return N;
6214 }
6215 if (VT.getVectorElementType().isFloatingPoint()) {
6216 SmallVector<SDValue, 8> Ops;
6217 for (unsigned i = 0; i < NumElts; ++i)
6218 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
6219 Op.getOperand(i)));
6220 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
6221 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6222 Val = LowerBUILD_VECTOR(Val, DAG, ST);
6223 if (Val.getNode())
6224 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6225 }
6226 if (usesOnlyOneValue) {
6227 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
6228 if (isConstant && Val.getNode())
6229 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
6230 }
6231 }
6232
6233 // If all elements are constants and the case above didn't get hit, fall back
6234 // to the default expansion, which will generate a load from the constant
6235 // pool.
6236 if (isConstant)
6237 return SDValue();
6238
6239 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6240 if (NumElts >= 4) {
6241 SDValue shuffle = ReconstructShuffle(Op, DAG);
6242 if (shuffle != SDValue())
6243 return shuffle;
6244 }
6245
6246 if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
6247 // If we haven't found an efficient lowering, try splitting a 128-bit vector
6248 // into two 64-bit vectors; we might discover a better way to lower it.
6249 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
6250 EVT ExtVT = VT.getVectorElementType();
6251 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
6252 SDValue Lower =
6253 DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
6254 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
6255 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
6256 SDValue Upper = DAG.getBuildVector(
6257 HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
6258 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
6259 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
6260 if (Lower && Upper)
6261 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
6262 }
6263
6264 // Vectors with 32- or 64-bit elements can be built by directly assigning
6265 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
6266 // will be legalized.
6267 if (EltSize >= 32) {
6268 // Do the expansion with floating-point types, since that is what the VFP
6269 // registers are defined to use, and since i64 is not legal.
6270 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6271 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6272 SmallVector<SDValue, 8> Ops;
6273 for (unsigned i = 0; i < NumElts; ++i)
6274 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
6275 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6276 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6277 }
6278
6279 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6280 // know the default expansion would otherwise fall back on something even
6281 // worse. For a vector with one or two non-undef values, that's
6282 // scalar_to_vector for the elements followed by a shuffle (provided the
6283 // shuffle is valid for the target) and materialization element by element
6284 // on the stack followed by a load for everything else.
6285 if (!isConstant && !usesOnlyOneValue) {
6286 SDValue Vec = DAG.getUNDEF(VT);
6287 for (unsigned i = 0 ; i < NumElts; ++i) {
6288 SDValue V = Op.getOperand(i);
6289 if (V.isUndef())
6290 continue;
6291 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
6292 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6293 }
6294 return Vec;
6295 }
6296
6297 return SDValue();
6298}
6299
6300// Gather data to see if the operation can be modelled as a
6301// shuffle in combination with VEXTs.
6302SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
6303 SelectionDAG &DAG) const {
6304 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")(static_cast <bool> (Op.getOpcode() == ISD::BUILD_VECTOR
&& "Unknown opcode!") ? void (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6304, __extension__ __PRETTY_FUNCTION__))
;
6305 SDLoc dl(Op);
6306 EVT VT = Op.getValueType();
6307 unsigned NumElts = VT.getVectorNumElements();
6308
6309 struct ShuffleSourceInfo {
6310 SDValue Vec;
6311 unsigned MinElt = std::numeric_limits<unsigned>::max();
6312 unsigned MaxElt = 0;
6313
6314 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
6315 // be compatible with the shuffle we intend to construct. As a result
6316 // ShuffleVec will be some sliding window into the original Vec.
6317 SDValue ShuffleVec;
6318
6319 // Code should guarantee that element i in Vec starts at element "WindowBase
6320 // + i * WindowScale in ShuffleVec".
6321 int WindowBase = 0;
6322 int WindowScale = 1;
6323
6324 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
6325
6326 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
6327 };
6328
6329 // First gather all vectors used as an immediate source for this BUILD_VECTOR
6330 // node.
6331 SmallVector<ShuffleSourceInfo, 2> Sources;
6332 for (unsigned i = 0; i < NumElts; ++i) {
6333 SDValue V = Op.getOperand(i);
6334 if (V.isUndef())
6335 continue;
6336 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
6337 // A shuffle can only come from building a vector from various
6338 // elements of other vectors.
6339 return SDValue();
6340 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
6341 // Furthermore, shuffles require a constant mask, whereas extractelts
6342 // accept variable indices.
6343 return SDValue();
6344 }
6345
6346 // Add this element source to the list if it's not already there.
6347 SDValue SourceVec = V.getOperand(0);
6348 auto Source = llvm::find(Sources, SourceVec);
6349 if (Source == Sources.end())
6350 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
6351
6352 // Update the minimum and maximum lane number seen.
6353 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
6354 Source->MinElt = std::min(Source->MinElt, EltNo);
6355 Source->MaxElt = std::max(Source->MaxElt, EltNo);
6356 }
6357
6358 // Currently only do something sane when at most two source vectors
6359 // are involved.
6360 if (Sources.size() > 2)
6361 return SDValue();
6362
6363 // Find out the smallest element size among result and two sources, and use
6364 // it as element size to build the shuffle_vector.
6365 EVT SmallestEltTy = VT.getVectorElementType();
6366 for (auto &Source : Sources) {
6367 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
6368 if (SrcEltTy.bitsLT(SmallestEltTy))
6369 SmallestEltTy = SrcEltTy;
6370 }
6371 unsigned ResMultiplier =
6372 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
6373 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
6374 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
6375
6376 // If the source vector is too wide or too narrow, we may nevertheless be able
6377 // to construct a compatible shuffle either by concatenating it with UNDEF or
6378 // extracting a suitable range of elements.
6379 for (auto &Src : Sources) {
6380 EVT SrcVT = Src.ShuffleVec.getValueType();
6381
6382 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
6383 continue;
6384
6385 // This stage of the search produces a source with the same element type as
6386 // the original, but with a total width matching the BUILD_VECTOR output.
6387 EVT EltVT = SrcVT.getVectorElementType();
6388 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
6389 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
6390
6391 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
6392 if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
6393 return SDValue();
6394 // We can pad out the smaller vector for free, so if it's part of a
6395 // shuffle...
6396 Src.ShuffleVec =
6397 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
6398 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
6399 continue;
6400 }
6401
6402 if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
6403 return SDValue();
6404
6405 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
6406 // Span too large for a VEXT to cope
6407 return SDValue();
6408 }
6409
6410 if (Src.MinElt >= NumSrcElts) {
6411 // The extraction can just take the second half
6412 Src.ShuffleVec =
6413 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6414 DAG.getConstant(NumSrcElts, dl, MVT::i32));
6415 Src.WindowBase = -NumSrcElts;
6416 } else if (Src.MaxElt < NumSrcElts) {
6417 // The extraction can just take the first half
6418 Src.ShuffleVec =
6419 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6420 DAG.getConstant(0, dl, MVT::i32));
6421 } else {
6422 // An actual VEXT is needed
6423 SDValue VEXTSrc1 =
6424 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6425 DAG.getConstant(0, dl, MVT::i32));
6426 SDValue VEXTSrc2 =
6427 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6428 DAG.getConstant(NumSrcElts, dl, MVT::i32));
6429
6430 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
6431 VEXTSrc2,
6432 DAG.getConstant(Src.MinElt, dl, MVT::i32));
6433 Src.WindowBase = -Src.MinElt;
6434 }
6435 }
6436
6437 // Another possible incompatibility occurs from the vector element types. We
6438 // can fix this by bitcasting the source vectors to the same type we intend
6439 // for the shuffle.
6440 for (auto &Src : Sources) {
6441 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
6442 if (SrcEltTy == SmallestEltTy)
6443 continue;
6444 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)(static_cast <bool> (ShuffleVT.getVectorElementType() ==
SmallestEltTy) ? void (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6444, __extension__ __PRETTY_FUNCTION__))
;
6445 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
6446 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
6447 Src.WindowBase *= Src.WindowScale;
6448 }
6449
6450 // Final sanity check before we try to actually produce a shuffle.
6451 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6453, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
6452 for (auto Src : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6453, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
6453 assert(Src.ShuffleVec.getValueType() == ShuffleVT);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6453, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
6454 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) (static_cast <bool
> (Src.ShuffleVec.getValueType() == ShuffleVT) ? void (0) :
__assert_fail ("Src.ShuffleVec.getValueType() == ShuffleVT",
"/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6453, __extension__ __PRETTY_FUNCTION__));; } } while (false
)
;
6455
6456 // The stars all align, our next step is to produce the mask for the shuffle.
6457 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
6458 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
6459 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
6460 SDValue Entry = Op.getOperand(i);
6461 if (Entry.isUndef())
6462 continue;
6463
6464 auto Src = llvm::find(Sources, Entry.getOperand(0));
6465 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
6466
6467 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
6468 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
6469 // segment.
6470 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
6471 int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
6472 VT.getScalarSizeInBits());
6473 int LanesDefined = BitsDefined / BitsPerShuffleLane;
6474
6475 // This source is expected to fill ResMultiplier lanes of the final shuffle,
6476 // starting at the appropriate offset.
6477 int *LaneMask = &Mask[i * ResMultiplier];
6478
6479 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
6480 ExtractBase += NumElts * (Src - Sources.begin());
6481 for (int j = 0; j < LanesDefined; ++j)
6482 LaneMask[j] = ExtractBase + j;
6483 }
6484
6485 // Final check before we try to produce nonsense...
6486 if (!isShuffleMaskLegal(Mask, ShuffleVT))
6487 return SDValue();
6488
6489 // We can't handle more than two sources. This should have already
6490 // been checked before this point.
6491 assert(Sources.size() <= 2 && "Too many sources!")(static_cast <bool> (Sources.size() <= 2 && "Too many sources!"
) ? void (0) : __assert_fail ("Sources.size() <= 2 && \"Too many sources!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6491, __extension__ __PRETTY_FUNCTION__))
;
6492
6493 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
6494 for (unsigned i = 0; i < Sources.size(); ++i)
6495 ShuffleOps[i] = Sources[i].ShuffleVec;
6496
6497 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
6498 ShuffleOps[1], Mask);
6499 return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
6500}
6501
6502/// isShuffleMaskLegal - Targets can use this to indicate that they only
6503/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
6504/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
6505/// are assumed to be legal.
6506bool ARMTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
6507 if (VT.getVectorNumElements() == 4 &&
6508 (VT.is128BitVector() || VT.is64BitVector())) {
6509 unsigned PFIndexes[4];
6510 for (unsigned i = 0; i != 4; ++i) {
6511 if (M[i] < 0)
6512 PFIndexes[i] = 8;
6513 else
6514 PFIndexes[i] = M[i];
6515 }
6516
6517 // Compute the index in the perfect shuffle table.
6518 unsigned PFTableIndex =
6519 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6520 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6521 unsigned Cost = (PFEntry >> 30);
6522
6523 if (Cost <= 4)
6524 return true;
6525 }
6526
6527 bool ReverseVEXT, isV_UNDEF;
6528 unsigned Imm, WhichResult;
6529
6530 unsigned EltSize = VT.getScalarSizeInBits();
6531 return (EltSize >= 32 ||
6532 ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
6533 isVREVMask(M, VT, 64) ||
6534 isVREVMask(M, VT, 32) ||
6535 isVREVMask(M, VT, 16) ||
6536 isVEXTMask(M, VT, ReverseVEXT, Imm) ||
6537 isVTBLMask(M, VT) ||
6538 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
6539 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
6540}
6541
6542/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
6543/// the specified operations to build the shuffle.
6544static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
6545 SDValue RHS, SelectionDAG &DAG,
6546 const SDLoc &dl) {
6547 unsigned OpNum = (PFEntry >> 26) & 0x0F;
6548 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
6549 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
6550
6551 enum {
6552 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6553 OP_VREV,
6554 OP_VDUP0,
6555 OP_VDUP1,
6556 OP_VDUP2,
6557 OP_VDUP3,
6558 OP_VEXT1,
6559 OP_VEXT2,
6560 OP_VEXT3,
6561 OP_VUZPL, // VUZP, left result
6562 OP_VUZPR, // VUZP, right result
6563 OP_VZIPL, // VZIP, left result
6564 OP_VZIPR, // VZIP, right result
6565 OP_VTRNL, // VTRN, left result
6566 OP_VTRNR // VTRN, right result
6567 };
6568
6569 if (OpNum == OP_COPY) {
6570 if (LHSID == (1*9+2)*9+3) return LHS;
6571 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")(static_cast <bool> (LHSID == ((4*9+5)*9+6)*9+7 &&
"Illegal OP_COPY!") ? void (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6571, __extension__ __PRETTY_FUNCTION__))
;
6572 return RHS;
6573 }
6574
6575 SDValue OpLHS, OpRHS;
6576 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
6577 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
6578 EVT VT = OpLHS.getValueType();
6579
6580 switch (OpNum) {
6581 default: llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6581)
;
6582 case OP_VREV:
6583 // VREV divides the vector in half and swaps within the half.
6584 if (VT.getVectorElementType() == MVT::i32 ||
6585 VT.getVectorElementType() == MVT::f32)
6586 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
6587 // vrev <4 x i16> -> VREV32
6588 if (VT.getVectorElementType() == MVT::i16)
6589 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
6590 // vrev <4 x i8> -> VREV16
6591 assert(VT.getVectorElementType() == MVT::i8)(static_cast <bool> (VT.getVectorElementType() == MVT::
i8) ? void (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6591, __extension__ __PRETTY_FUNCTION__))
;
6592 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
6593 case OP_VDUP0:
6594 case OP_VDUP1:
6595 case OP_VDUP2:
6596 case OP_VDUP3:
6597 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6598 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
6599 case OP_VEXT1:
6600 case OP_VEXT2:
6601 case OP_VEXT3:
6602 return DAG.getNode(ARMISD::VEXT, dl, VT,
6603 OpLHS, OpRHS,
6604 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
6605 case OP_VUZPL:
6606 case OP_VUZPR:
6607 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
6608 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
6609 case OP_VZIPL:
6610 case OP_VZIPR:
6611 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
6612 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
6613 case OP_VTRNL:
6614 case OP_VTRNR:
6615 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
6616 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
6617 }
6618}
6619
6620static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
6621 ArrayRef<int> ShuffleMask,
6622 SelectionDAG &DAG) {
6623 // Check to see if we can use the VTBL instruction.
6624 SDValue V1 = Op.getOperand(0);
6625 SDValue V2 = Op.getOperand(1);
6626 SDLoc DL(Op);
6627
6628 SmallVector<SDValue, 8> VTBLMask;
6629 for (ArrayRef<int>::iterator
6630 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
6631 VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
6632
6633 if (V2.getNode()->isUndef())
6634 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
6635 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6636
6637 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
6638 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6639}
6640
6641static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
6642 SelectionDAG &DAG) {
6643 SDLoc DL(Op);
6644 SDValue OpLHS = Op.getOperand(0);
6645 EVT VT = OpLHS.getValueType();
6646
6647 assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&(static_cast <bool> ((VT == MVT::v8i16 || VT == MVT::v16i8
) && "Expect an v8i16/v16i8 type") ? void (0) : __assert_fail
("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6648, __extension__ __PRETTY_FUNCTION__))
6648 "Expect an v8i16/v16i8 type")(static_cast <bool> ((VT == MVT::v8i16 || VT == MVT::v16i8
) && "Expect an v8i16/v16i8 type") ? void (0) : __assert_fail
("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6648, __extension__ __PRETTY_FUNCTION__))
;
6649 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
6650 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
6651 // extract the first 8 bytes into the top double word and the last 8 bytes
6652 // into the bottom double word. The v8i16 case is similar.
6653 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
6654 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
6655 DAG.getConstant(ExtractNum, DL, MVT::i32));
6656}
6657
6658static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
6659 SDValue V1 = Op.getOperand(0);
6660 SDValue V2 = Op.getOperand(1);
6661 SDLoc dl(Op);
6662 EVT VT = Op.getValueType();
6663 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
6664
6665 // Convert shuffles that are directly supported on NEON to target-specific
6666 // DAG nodes, instead of keeping them as shuffles and matching them again
6667 // during code selection. This is more efficient and avoids the possibility
6668 // of inconsistencies between legalization and selection.
6669 // FIXME: floating-point vectors should be canonicalized to integer vectors
6670 // of the same time so that they get CSEd properly.
6671 ArrayRef<int> ShuffleMask = SVN->getMask();
6672
6673 unsigned EltSize = VT.getScalarSizeInBits();
6674 if (EltSize <= 32) {
1
Assuming 'EltSize' is <= 32
2
Taking true branch
6675 if (SVN->isSplat()) {
3
Assuming the condition is false
4
Taking false branch
6676 int Lane = SVN->getSplatIndex();
6677 // If this is undef splat, generate it via "just" vdup, if possible.
6678 if (Lane == -1) Lane = 0;
6679
6680 // Test if V1 is a SCALAR_TO_VECTOR.
6681 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
6682 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6683 }
6684 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
6685 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
6686 // reaches it).
6687 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
6688 !isa<ConstantSDNode>(V1.getOperand(0))) {
6689 bool IsScalarToVector = true;
6690 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
6691 if (!V1.getOperand(i).isUndef()) {
6692 IsScalarToVector = false;
6693 break;
6694 }
6695 if (IsScalarToVector)
6696 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6697 }
6698 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
6699 DAG.getConstant(Lane, dl, MVT::i32));
6700 }
6701
6702 bool ReverseVEXT;
6703 unsigned Imm;
6704 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
5
Assuming the condition is false
6
Taking false branch
6705 if (ReverseVEXT)
6706 std::swap(V1, V2);
6707 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
6708 DAG.getConstant(Imm, dl, MVT::i32));
6709 }
6710
6711 if (isVREVMask(ShuffleMask, VT, 64))
7
Assuming the condition is false
8
Taking false branch
6712 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
6713 if (isVREVMask(ShuffleMask, VT, 32))
9
Assuming the condition is false
10
Taking false branch
6714 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
6715 if (isVREVMask(ShuffleMask, VT, 16))
11
Assuming the condition is false
12
Taking false branch
6716 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
6717
6718 if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
6719 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
6720 DAG.getConstant(Imm, dl, MVT::i32));
6721 }
6722
6723 // Check for Neon shuffles that modify both input vectors in place.
6724 // If both results are used, i.e., if there are two shuffles with the same
6725 // source operands and with masks corresponding to both results of one of
6726 // these operations, DAG memoization will ensure that a single node is
6727 // used for both shuffles.
6728 unsigned WhichResult;
13
'WhichResult' declared without an initial value
6729 bool isV_UNDEF;
6730 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
14
Calling 'isNEONTwoResultShuffleMask'
48
Returning from 'isNEONTwoResultShuffleMask'
49
Taking true branch
6731 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
6732 if (isV_UNDEF)
50
Taking false branch
6733 V2 = V1;
6734 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
51
1st function call argument is an uninitialized value
6735 .getValue(WhichResult);
6736 }
6737
6738 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
6739 // shuffles that produce a result larger than their operands with:
6740 // shuffle(concat(v1, undef), concat(v2, undef))
6741 // ->
6742 // shuffle(concat(v1, v2), undef)
6743 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
6744 //
6745 // This is useful in the general case, but there are special cases where
6746 // native shuffles produce larger results: the two-result ops.
6747 //
6748 // Look through the concat when lowering them:
6749 // shuffle(concat(v1, v2), undef)
6750 // ->
6751 // concat(VZIP(v1, v2):0, :1)
6752 //
6753 if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
6754 SDValue SubV1 = V1->getOperand(0);
6755 SDValue SubV2 = V1->getOperand(1);
6756 EVT SubVT = SubV1.getValueType();
6757
6758 // We expect these to have been canonicalized to -1.
6759 assert(llvm::all_of(ShuffleMask, [&](int i) {(static_cast <bool> (llvm::all_of(ShuffleMask, [&](
int i) { return i < (int)VT.getVectorNumElements(); }) &&
"Unexpected shuffle index into UNDEF operand!") ? void (0) :
__assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6761, __extension__ __PRETTY_FUNCTION__))
6760 return i < (int)VT.getVectorNumElements();(static_cast <bool> (llvm::all_of(ShuffleMask, [&](
int i) { return i < (int)VT.getVectorNumElements(); }) &&
"Unexpected shuffle index into UNDEF operand!") ? void (0) :
__assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6761, __extension__ __PRETTY_FUNCTION__))
6761 }) && "Unexpected shuffle index into UNDEF operand!")(static_cast <bool> (llvm::all_of(ShuffleMask, [&](
int i) { return i < (int)VT.getVectorNumElements(); }) &&
"Unexpected shuffle index into UNDEF operand!") ? void (0) :
__assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6761, __extension__ __PRETTY_FUNCTION__))
;
6762
6763 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6764 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
6765 if (isV_UNDEF)
6766 SubV2 = SubV1;
6767 assert((WhichResult == 0) &&(static_cast <bool> ((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? void (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6768, __extension__ __PRETTY_FUNCTION__))
6768 "In-place shuffle of concat can only have one result!")(static_cast <bool> ((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? void (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6768, __extension__ __PRETTY_FUNCTION__))
;
6769 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
6770 SubV1, SubV2);
6771 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
6772 Res.getValue(1));
6773 }
6774 }
6775 }
6776
6777 // If the shuffle is not directly supported and it has 4 elements, use
6778 // the PerfectShuffle-generated table to synthesize it from other shuffles.
6779 unsigned NumElts = VT.getVectorNumElements();
6780 if (NumElts == 4) {
6781 unsigned PFIndexes[4];
6782 for (unsigned i = 0; i != 4; ++i) {
6783 if (ShuffleMask[i] < 0)
6784 PFIndexes[i] = 8;
6785 else
6786 PFIndexes[i] = ShuffleMask[i];
6787 }
6788
6789 // Compute the index in the perfect shuffle table.
6790 unsigned PFTableIndex =
6791 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6792 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6793 unsigned Cost = (PFEntry >> 30);
6794
6795 if (Cost <= 4)
6796 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
6797 }
6798
6799 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
6800 if (EltSize >= 32) {
6801 // Do the expansion with floating-point types, since that is what the VFP
6802 // registers are defined to use, and since i64 is not legal.
6803 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6804 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6805 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
6806 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
6807 SmallVector<SDValue, 8> Ops;
6808 for (unsigned i = 0; i < NumElts; ++i) {
6809 if (ShuffleMask[i] < 0)
6810 Ops.push_back(DAG.getUNDEF(EltVT));
6811 else
6812 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
6813 ShuffleMask[i] < (int)NumElts ? V1 : V2,
6814 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
6815 dl, MVT::i32)));
6816 }
6817 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6818 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6819 }
6820
6821 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
6822 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
6823
6824 if (VT == MVT::v8i8)
6825 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
6826 return NewOp;
6827
6828 return SDValue();
6829}
6830
6831static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
6832 // INSERT_VECTOR_ELT is legal only for immediate indexes.
6833 SDValue Lane = Op.getOperand(2);
6834 if (!isa<ConstantSDNode>(Lane))
6835 return SDValue();
6836
6837 return Op;
6838}
6839
6840static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
6841 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
6842 SDValue Lane = Op.getOperand(1);
6843 if (!isa<ConstantSDNode>(Lane))
6844 return SDValue();
6845
6846 SDValue Vec = Op.getOperand(0);
6847 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
6848 SDLoc dl(Op);
6849 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
6850 }
6851
6852 return Op;
6853}
6854
6855static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
6856 // The only time a CONCAT_VECTORS operation can have legal types is when
6857 // two 64-bit vectors are concatenated to a 128-bit vector.
6858 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&(static_cast <bool> (Op.getValueType().is128BitVector()
&& Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"
) ? void (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6859, __extension__ __PRETTY_FUNCTION__))
6859 "unexpected CONCAT_VECTORS")(static_cast <bool> (Op.getValueType().is128BitVector()
&& Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"
) ? void (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6859, __extension__ __PRETTY_FUNCTION__))
;
6860 SDLoc dl(Op);
6861 SDValue Val = DAG.getUNDEF(MVT::v2f64);
6862 SDValue Op0 = Op.getOperand(0);
6863 SDValue Op1 = Op.getOperand(1);
6864 if (!Op0.isUndef())
6865 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6866 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
6867 DAG.getIntPtrConstant(0, dl));
6868 if (!Op1.isUndef())
6869 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6870 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
6871 DAG.getIntPtrConstant(1, dl));
6872 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
6873}
6874
6875/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
6876/// element has been zero/sign-extended, depending on the isSigned parameter,
6877/// from an integer type half its size.
6878static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
6879 bool isSigned) {
6880 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
6881 EVT VT = N->getValueType(0);
6882 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
6883 SDNode *BVN = N->getOperand(0).getNode();
6884 if (BVN->getValueType(0) != MVT::v4i32 ||
6885 BVN->getOpcode() != ISD::BUILD_VECTOR)
6886 return false;
6887 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
6888 unsigned HiElt = 1 - LoElt;
6889 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
6890 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
6891 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
6892 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
6893 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
6894 return false;
6895 if (isSigned) {
6896 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
6897 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
6898 return true;
6899 } else {
6900 if (Hi0->isNullValue() && Hi1->isNullValue())
6901 return true;
6902 }
6903 return false;
6904 }
6905
6906 if (N->getOpcode() != ISD::BUILD_VECTOR)
6907 return false;
6908
6909 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
6910 SDNode *Elt = N->getOperand(i).getNode();
6911 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
6912 unsigned EltSize = VT.getScalarSizeInBits();
6913 unsigned HalfSize = EltSize / 2;
6914 if (isSigned) {
6915 if (!isIntN(HalfSize, C->getSExtValue()))
6916 return false;
6917 } else {
6918 if (!isUIntN(HalfSize, C->getZExtValue()))
6919 return false;
6920 }
6921 continue;
6922 }
6923 return false;
6924 }
6925
6926 return true;
6927}
6928
6929/// isSignExtended - Check if a node is a vector value that is sign-extended
6930/// or a constant BUILD_VECTOR with sign-extended elements.
6931static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
6932 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
6933 return true;
6934 if (isExtendedBUILD_VECTOR(N, DAG, true))
6935 return true;
6936 return false;
6937}
6938
6939/// isZeroExtended - Check if a node is a vector value that is zero-extended
6940/// or a constant BUILD_VECTOR with zero-extended elements.
6941static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
6942 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
6943 return true;
6944 if (isExtendedBUILD_VECTOR(N, DAG, false))
6945 return true;
6946 return false;
6947}
6948
6949static EVT getExtensionTo64Bits(const EVT &OrigVT) {
6950 if (OrigVT.getSizeInBits() >= 64)
6951 return OrigVT;
6952
6953 assert(OrigVT.isSimple() && "Expecting a simple value type")(static_cast <bool> (OrigVT.isSimple() && "Expecting a simple value type"
) ? void (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6953, __extension__ __PRETTY_FUNCTION__))
;
6954
6955 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
6956 switch (OrigSimpleTy) {
6957 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6957)
;
6958 case MVT::v2i8:
6959 case MVT::v2i16:
6960 return MVT::v2i32;
6961 case MVT::v4i8:
6962 return MVT::v4i16;
6963 }
6964}
6965
6966/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
6967/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
6968/// We insert the required extension here to get the vector to fill a D register.
6969static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
6970 const EVT &OrigTy,
6971 const EVT &ExtTy,
6972 unsigned ExtOpcode) {
6973 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
6974 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
6975 // 64-bits we need to insert a new extension so that it will be 64-bits.
6976 assert(ExtTy.is128BitVector() && "Unexpected extension size")(static_cast <bool> (ExtTy.is128BitVector() && "Unexpected extension size"
) ? void (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 6976, __extension__ __PRETTY_FUNCTION__))
;
6977 if (OrigTy.getSizeInBits() >= 64)
6978 return N;
6979
6980 // Must extend size to at least 64 bits to be used as an operand for VMULL.
6981 EVT NewVT = getExtensionTo64Bits(OrigTy);
6982
6983 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
6984}
6985
6986/// SkipLoadExtensionForVMULL - return a load of the original vector size that
6987/// does not do any sign/zero extension. If the original vector is less
6988/// than 64 bits, an appropriate extension will be added after the load to
6989/// reach a total size of 64 bits. We have to add the extension separately
6990/// because ARM does not have a sign/zero extending load for vectors.
6991static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
6992 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
6993
6994 // The load already has the right type.
6995 if (ExtendedTy == LD->getMemoryVT())
6996 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
6997 LD->getBasePtr(), LD->getPointerInfo(),
6998 LD->getAlignment(), LD->getMemOperand()->getFlags());
6999
7000 // We need to create a zextload/sextload. We cannot just create a load
7001 // followed by a zext/zext node because LowerMUL is also run during normal
7002 // operation legalization where we can't create illegal types.
7003 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
7004 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
7005 LD->getMemoryVT(), LD->getAlignment(),
7006 LD->getMemOperand()->getFlags());
7007}
7008
7009/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
7010/// extending load, or BUILD_VECTOR with extended elements, return the
7011/// unextended value. The unextended vector should be 64 bits so that it can
7012/// be used as an operand to a VMULL instruction. If the original vector size
7013/// before extension is less than 64 bits we add a an extension to resize
7014/// the vector to 64 bits.
7015static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
7016 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
7017 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
7018 N->getOperand(0)->getValueType(0),
7019 N->getValueType(0),
7020 N->getOpcode());
7021
7022 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
7023 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&(static_cast <bool> ((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad
(LD)) && "Expected extending load") ? void (0) : __assert_fail
("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7024, __extension__ __PRETTY_FUNCTION__))
7024 "Expected extending load")(static_cast <bool> ((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad
(LD)) && "Expected extending load") ? void (0) : __assert_fail
("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7024, __extension__ __PRETTY_FUNCTION__))
;
7025
7026 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
7027 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
7028 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7029 SDValue extLoad =
7030 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
7031 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
7032
7033 return newLoad;
7034 }
7035
7036 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
7037 // have been legalized as a BITCAST from v4i32.
7038 if (N->getOpcode() == ISD::BITCAST) {
7039 SDNode *BVN = N->getOperand(0).getNode();
7040 assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&(static_cast <bool> (BVN->getOpcode() == ISD::BUILD_VECTOR
&& BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7041, __extension__ __PRETTY_FUNCTION__))
7041 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR")(static_cast <bool> (BVN->getOpcode() == ISD::BUILD_VECTOR
&& BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? void (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7041, __extension__ __PRETTY_FUNCTION__))
;
7042 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
7043 return DAG.getBuildVector(
7044 MVT::v2i32, SDLoc(N),
7045 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
7046 }
7047 // Construct a new BUILD_VECTOR with elements truncated to half the size.
7048 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")(static_cast <bool> (N->getOpcode() == ISD::BUILD_VECTOR
&& "expected BUILD_VECTOR") ? void (0) : __assert_fail
("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7048, __extension__ __PRETTY_FUNCTION__))
;
7049 EVT VT = N->getValueType(0);
7050 unsigned EltSize = VT.getScalarSizeInBits() / 2;
7051 unsigned NumElts = VT.getVectorNumElements();
7052 MVT TruncVT = MVT::getIntegerVT(EltSize);
7053 SmallVector<SDValue, 8> Ops;
7054 SDLoc dl(N);
7055 for (unsigned i = 0; i != NumElts; ++i) {
7056 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
7057 const APInt &CInt = C->getAPIntValue();
7058 // Element types smaller than 32 bits are not legal, so use i32 elements.
7059 // The values are implicitly truncated so sext vs. zext doesn't matter.
7060 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
7061 }
7062 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
7063}
7064
7065static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
7066 unsigned Opcode = N->getOpcode();
7067 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7068 SDNode *N0 = N->getOperand(0).getNode();
7069 SDNode *N1 = N->getOperand(1).getNode();
7070 return N0->hasOneUse() && N1->hasOneUse() &&
7071 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
7072 }
7073 return false;
7074}
7075
7076static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
7077 unsigned Opcode = N->getOpcode();
7078 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7079 SDNode *N0 = N->getOperand(0).getNode();
7080 SDNode *N1 = N->getOperand(1).getNode();
7081 return N0->hasOneUse() && N1->hasOneUse() &&
7082 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
7083 }
7084 return false;
7085}
7086
7087static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
7088 // Multiplications are only custom-lowered for 128-bit vectors so that
7089 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
7090 EVT VT = Op.getValueType();
7091 assert(VT.is128BitVector() && VT.isInteger() &&(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7092, __extension__ __PRETTY_FUNCTION__))
7092 "unexpected type for custom-lowering ISD::MUL")(static_cast <bool> (VT.is128BitVector() && VT.
isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? void (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7092, __extension__ __PRETTY_FUNCTION__))
;
7093 SDNode *N0 = Op.getOperand(0).getNode();
7094 SDNode *N1 = Op.getOperand(1).getNode();
7095 unsigned NewOpc = 0;
7096 bool isMLA = false;
7097 bool isN0SExt = isSignExtended(N0, DAG);
7098 bool isN1SExt = isSignExtended(N1, DAG);
7099 if (isN0SExt && isN1SExt)
7100 NewOpc = ARMISD::VMULLs;
7101 else {
7102 bool isN0ZExt = isZeroExtended(N0, DAG);
7103 bool isN1ZExt = isZeroExtended(N1, DAG);
7104 if (isN0ZExt && isN1ZExt)
7105 NewOpc = ARMISD::VMULLu;
7106 else if (isN1SExt || isN1ZExt) {
7107 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
7108 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
7109 if (isN1SExt && isAddSubSExt(N0, DAG)) {
7110 NewOpc = ARMISD::VMULLs;
7111 isMLA = true;
7112 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
7113 NewOpc = ARMISD::VMULLu;
7114 isMLA = true;
7115 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
7116 std::swap(N0, N1);
7117 NewOpc = ARMISD::VMULLu;
7118 isMLA = true;
7119 }
7120 }
7121
7122 if (!NewOpc) {
7123 if (VT == MVT::v2i64)
7124 // Fall through to expand this. It is not legal.
7125 return SDValue();
7126 else
7127 // Other vector multiplications are legal.
7128 return Op;
7129 }
7130 }
7131
7132 // Legalize to a VMULL instruction.
7133 SDLoc DL(Op);
7134 SDValue Op0;
7135 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
7136 if (!isMLA) {
7137 Op0 = SkipExtensionForVMULL(N0, DAG);
7138 assert(Op0.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7140, __extension__ __PRETTY_FUNCTION__))
7139 Op1.getValueType().is64BitVector() &&(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7140, __extension__ __PRETTY_FUNCTION__))
7140 "unexpected types for extended operands to VMULL")(static_cast <bool> (Op0.getValueType().is64BitVector()
&& Op1.getValueType().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? void (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7140, __extension__ __PRETTY_FUNCTION__))
;
7141 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
7142 }
7143
7144 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
7145 // isel lowering to take advantage of no-stall back to back vmul + vmla.
7146 // vmull q0, d4, d6
7147 // vmlal q0, d5, d6
7148 // is faster than
7149 // vaddl q0, d4, d5
7150 // vmovl q1, d6
7151 // vmul q0, q0, q1
7152 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
7153 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
7154 EVT Op1VT = Op1.getValueType();
7155 return DAG.getNode(N0->getOpcode(), DL, VT,
7156 DAG.getNode(NewOpc, DL, VT,
7157 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
7158 DAG.getNode(NewOpc, DL, VT,
7159 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
7160}
7161
7162static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
7163 SelectionDAG &DAG) {
7164 // TODO: Should this propagate fast-math-flags?
7165
7166 // Convert to float
7167 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
7168 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
7169 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
7170 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
7171 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
7172 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
7173 // Get reciprocal estimate.
7174 // float4 recip = vrecpeq_f32(yf);
7175 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7176 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7177 Y);
7178 // Because char has a smaller range than uchar, we can actually get away
7179 // without any newton steps. This requires that we use a weird bias
7180 // of 0xb000, however (again, this has been exhaustively tested).
7181 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
7182 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
7183 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
7184 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
7185 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
7186 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
7187 // Convert back to short.
7188 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
7189 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
7190 return X;
7191}
7192
7193static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
7194 SelectionDAG &DAG) {
7195 // TODO: Should this propagate fast-math-flags?
7196
7197 SDValue N2;
7198 // Convert to float.
7199 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
7200 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
7201 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
7202 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
7203 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7204 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7205
7206 // Use reciprocal estimate and one refinement step.
7207 // float4 recip = vrecpeq_f32(yf);
7208 // recip *= vrecpsq_f32(yf, recip);
7209 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7210 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7211 N1);
7212 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7213 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7214 N1, N2);
7215 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7216 // Because short has a smaller range than ushort, we can actually get away
7217 // with only a single newton step. This requires that we use a weird bias
7218 // of 89, however (again, this has been exhaustively tested).
7219 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
7220 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7221 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7222 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
7223 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7224 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7225 // Convert back to integer and return.
7226 // return vmovn_s32(vcvt_s32_f32(result));
7227 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7228 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7229 return N0;
7230}
7231
7232static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
7233 EVT VT = Op.getValueType();
7234 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::SDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7235, __extension__ __PRETTY_FUNCTION__))
7235 "unexpected type for custom-lowering ISD::SDIV")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::SDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7235, __extension__ __PRETTY_FUNCTION__))
;
7236
7237 SDLoc dl(Op);
7238 SDValue N0 = Op.getOperand(0);
7239 SDValue N1 = Op.getOperand(1);
7240 SDValue N2, N3;
7241
7242 if (VT == MVT::v8i8) {
7243 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
7244 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
7245
7246 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7247 DAG.getIntPtrConstant(4, dl));
7248 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7249 DAG.getIntPtrConstant(4, dl));
7250 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7251 DAG.getIntPtrConstant(0, dl));
7252 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7253 DAG.getIntPtrConstant(0, dl));
7254
7255 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
7256 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
7257
7258 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7259 N0 = LowerCONCAT_VECTORS(N0, DAG);
7260
7261 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
7262 return N0;
7263 }
7264 return LowerSDIV_v4i16(N0, N1, dl, DAG);
7265}
7266
7267static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
7268 // TODO: Should this propagate fast-math-flags?
7269 EVT VT = Op.getValueType();
7270 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::UDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7271, __extension__ __PRETTY_FUNCTION__))
7271 "unexpected type for custom-lowering ISD::UDIV")(static_cast <bool> ((VT == MVT::v4i16 || VT == MVT::v8i8
) && "unexpected type for custom-lowering ISD::UDIV")
? void (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7271, __extension__ __PRETTY_FUNCTION__))
;
7272
7273 SDLoc dl(Op);
7274 SDValue N0 = Op.getOperand(0);
7275 SDValue N1 = Op.getOperand(1);
7276 SDValue N2, N3;
7277
7278 if (VT == MVT::v8i8) {
7279 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
7280 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
7281
7282 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7283 DAG.getIntPtrConstant(4, dl));
7284 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7285 DAG.getIntPtrConstant(4, dl));
7286 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7287 DAG.getIntPtrConstant(0, dl));
7288 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7289 DAG.getIntPtrConstant(0, dl));
7290
7291 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
7292 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
7293
7294 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7295 N0 = LowerCONCAT_VECTORS(N0, DAG);
7296
7297 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
7298 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
7299 MVT::i32),
7300 N0);
7301 return N0;
7302 }
7303
7304 // v4i16 sdiv ... Convert to float.
7305 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
7306 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
7307 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
7308 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
7309 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7310 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7311
7312 // Use reciprocal estimate and two refinement steps.
7313 // float4 recip = vrecpeq_f32(yf);
7314 // recip *= vrecpsq_f32(yf, recip);
7315 // recip *= vrecpsq_f32(yf, recip);
7316 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7317 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7318 BN1);
7319 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7320 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7321 BN1, N2);
7322 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7323 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7324 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7325 BN1, N2);
7326 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7327 // Simply multiplying by the reciprocal estimate can leave us a few ulps
7328 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
7329 // and that it will never cause us to return an answer too large).
7330 // float4 result = as_float4(as_int4(xf*recip) + 2);
7331 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7332 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7333 N1 = DAG.getConstant(2, dl, MVT::v4i32);
7334 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7335 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7336 // Convert back to integer and return.
7337 // return vmovn_u32(vcvt_s32_f32(result));
7338 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7339 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7340 return N0;
7341}
7342
7343static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
7344 EVT VT = Op.getNode()->getValueType(0);
7345 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
7346
7347 unsigned Opc;
7348 bool ExtraOp = false;
7349 switch (Op.getOpcode()) {
7350 default: llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7350)
;
7351 case ISD::ADDC: Opc = ARMISD::ADDC; break;
7352 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
7353 case ISD::SUBC: Opc = ARMISD::SUBC; break;
7354 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
7355 }
7356
7357 if (!ExtraOp)
7358 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7359 Op.getOperand(1));
7360 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7361 Op.getOperand(1), Op.getOperand(2));
7362}
7363
7364SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
7365 assert(Subtarget->isTargetDarwin())(static_cast <bool> (Subtarget->isTargetDarwin()) ? void
(0) : __assert_fail ("Subtarget->isTargetDarwin()", "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7365, __extension__ __PRETTY_FUNCTION__))
;
7366
7367 // For iOS, we want to call an alternative entry point: __sincos_stret,
7368 // return values are passed via sret.
7369 SDLoc dl(Op);
7370 SDValue Arg = Op.getOperand(0);
7371 EVT ArgVT = Arg.getValueType();
7372 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
7373 auto PtrVT = getPointerTy(DAG.getDataLayout());
7374
7375 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7376 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7377
7378 // Pair of floats / doubles used to pass the result.
7379 Type *RetTy = StructType::get(ArgTy, ArgTy);
7380 auto &DL = DAG.getDataLayout();
7381
7382 ArgListTy Args;
7383 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
7384 SDValue SRet;
7385 if (ShouldUseSRet) {
7386 // Create stack object for sret.
7387 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
7388 const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
7389 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
7390 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
7391
7392 ArgListEntry Entry;
7393 Entry.Node = SRet;
7394 Entry.Ty = RetTy->getPointerTo();
7395 Entry.IsSExt = false;
7396 Entry.IsZExt = false;
7397 Entry.IsSRet = true;
7398 Args.push_back(Entry);
7399 RetTy = Type::getVoidTy(*DAG.getContext());
7400 }
7401
7402 ArgListEntry Entry;
7403 Entry.Node = Arg;
7404 Entry.Ty = ArgTy;
7405 Entry.IsSExt = false;
7406 Entry.IsZExt = false;
7407 Args.push_back(Entry);
7408
7409 const char *LibcallName =
7410 (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
7411 RTLIB::Libcall LC =
7412 (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
7413 CallingConv::ID CC = getLibcallCallingConv(LC);
7414 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
7415
7416 TargetLowering::CallLoweringInfo CLI(DAG);
7417 CLI.setDebugLoc(dl)
7418 .setChain(DAG.getEntryNode())
7419 .setCallee(CC, RetTy, Callee, std::move(Args))
7420 .setDiscardResult(ShouldUseSRet);
7421 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7422
7423 if (!ShouldUseSRet)
7424 return CallResult.first;
7425
7426 SDValue LoadSin =
7427 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
7428
7429 // Address of cos field.
7430 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
7431 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
7432 SDValue LoadCos =
7433 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
7434
7435 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
7436 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
7437 LoadSin.getValue(0), LoadCos.getValue(0));
7438}
7439
7440SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
7441 bool Signed,
7442 SDValue &Chain) const {
7443 EVT VT = Op.getValueType();
7444 assert((VT == MVT::i32 || VT == MVT::i64) &&(static_cast <bool> ((VT == MVT::i32 || VT == MVT::i64)
&& "unexpected type for custom lowering DIV") ? void
(0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7445, __extension__ __PRETTY_FUNCTION__))
7445 "unexpected type for custom lowering DIV")(static_cast <bool> ((VT == MVT::i32 || VT == MVT::i64)
&& "unexpected type for custom lowering DIV") ? void
(0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7445, __extension__ __PRETTY_FUNCTION__))
;
7446 SDLoc dl(Op);
7447
7448 const auto &DL = DAG.getDataLayout();
7449 const auto &TLI = DAG.getTargetLoweringInfo();
7450
7451 const char *Name = nullptr;
7452 if (Signed)
7453 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
7454 else
7455 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
7456
7457 SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
7458
7459 ARMTargetLowering::ArgListTy Args;
7460
7461 for (auto AI : {1, 0}) {
7462 ArgListEntry Arg;
7463 Arg.Node = Op.getOperand(AI);
7464 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
7465 Args.push_back(Arg);
7466 }
7467
7468 CallLoweringInfo CLI(DAG);
7469 CLI.setDebugLoc(dl)
7470 .setChain(Chain)
7471 .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
7472 ES, std::move(Args));
7473
7474 return LowerCallTo(CLI).first;
7475}
7476
7477SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
7478 bool Signed) const {
7479 assert(Op.getValueType() == MVT::i32 &&(static_cast <bool> (Op.getValueType() == MVT::i32 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7480, __extension__ __PRETTY_FUNCTION__))
7480 "unexpected type for custom lowering DIV")(static_cast <bool> (Op.getValueType() == MVT::i32 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7480, __extension__ __PRETTY_FUNCTION__))
;
7481 SDLoc dl(Op);
7482
7483 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
7484 DAG.getEntryNode(), Op.getOperand(1));
7485
7486 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7487}
7488
7489static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
7490 SDLoc DL(N);
7491 SDValue Op = N->getOperand(1);
7492 if (N->getValueType(0) == MVT::i32)
7493 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
7494 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7495 DAG.getConstant(0, DL, MVT::i32));
7496 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7497 DAG.getConstant(1, DL, MVT::i32));
7498 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
7499 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
7500}
7501
7502void ARMTargetLowering::ExpandDIV_Windows(
7503 SDValue Op, SelectionDAG &DAG, bool Signed,
7504 SmallVectorImpl<SDValue> &Results) const {
7505 const auto &DL = DAG.getDataLayout();
7506 const auto &TLI = DAG.getTargetLoweringInfo();
7507
7508 assert(Op.getValueType() == MVT::i64 &&(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7509, __extension__ __PRETTY_FUNCTION__))
7509 "unexpected type for custom lowering DIV")(static_cast <bool> (Op.getValueType() == MVT::i64 &&
"unexpected type for custom lowering DIV") ? void (0) : __assert_fail
("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7509, __extension__ __PRETTY_FUNCTION__))
;
7510 SDLoc dl(Op);
7511
7512 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
7513
7514 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7515
7516 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
7517 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
7518 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
7519 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
7520
7521 Results.push_back(Lower);
7522 Results.push_back(Upper);
7523}
7524
7525static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
7526 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
7527 // Acquire/Release load/store is not legal for targets without a dmb or
7528 // equivalent available.
7529 return SDValue();
7530
7531 // Monotonic load/store is legal for all targets.
7532 return Op;
7533}
7534
7535static void ReplaceREADCYCLECOUNTER(SDNode *N,
7536 SmallVectorImpl<SDValue> &Results,
7537 SelectionDAG &DAG,
7538 const ARMSubtarget *Subtarget) {
7539 SDLoc DL(N);
7540 // Under Power Management extensions, the cycle-count is:
7541 // mrc p15, #0, <Rt>, c9, c13, #0
7542 SDValue Ops[] = { N->getOperand(0), // Chain
7543 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
7544 DAG.getConstant(15, DL, MVT::i32),
7545 DAG.getConstant(0, DL, MVT::i32),
7546 DAG.getConstant(9, DL, MVT::i32),
7547 DAG.getConstant(13, DL, MVT::i32),
7548 DAG.getConstant(0, DL, MVT::i32)
7549 };
7550
7551 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
7552 DAG.getVTList(MVT::i32, MVT::Other), Ops);
7553 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
7554 DAG.getConstant(0, DL, MVT::i32)));
7555 Results.push_back(Cycles32.getValue(1));
7556}
7557
7558static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
7559 SDLoc dl(V.getNode());
7560 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
7561 SDValue VHi = DAG.getAnyExtOrTrunc(
7562 DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
7563 dl, MVT::i32);
7564 bool isBigEndian = DAG.getDataLayout().isBigEndian();
7565 if (isBigEndian)
7566 std::swap (VLo, VHi);
7567 SDValue RegClass =
7568 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
7569 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
7570 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
7571 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
7572 return SDValue(
7573 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
7574}
7575
7576static void ReplaceCMP_SWAP_64Results(SDNode *N,
7577 SmallVectorImpl<SDValue> & Results,
7578 SelectionDAG &DAG) {
7579 assert(N->getValueType(0) == MVT::i64 &&(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"AtomicCmpSwap on types less than 64 should be legal") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7580, __extension__ __PRETTY_FUNCTION__))
7580 "AtomicCmpSwap on types less than 64 should be legal")(static_cast <bool> (N->getValueType(0) == MVT::i64 &&
"AtomicCmpSwap on types less than 64 should be legal") ? void
(0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7580, __extension__ __PRETTY_FUNCTION__))
;
7581 SDValue Ops[] = {N->getOperand(1),
7582 createGPRPairNode(DAG, N->getOperand(2)),
7583 createGPRPairNode(DAG, N->getOperand(3)),
7584 N->getOperand(0)};
7585 SDNode *CmpSwap = DAG.getMachineNode(
7586 ARM::CMP_SWAP_64, SDLoc(N),
7587 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
7588
7589 MachineFunction &MF = DAG.getMachineFunction();
7590 MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
7591 MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
7592 cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
7593
7594 bool isBigEndian = DAG.getDataLayout().isBigEndian();
7595
7596 Results.push_back(
7597 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_1 : ARM::gsub_0,
7598 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
7599 Results.push_back(
7600 DAG.getTargetExtractSubreg(isBigEndian ? ARM::gsub_0 : ARM::gsub_1,
7601 SDLoc(N), MVT::i32, SDValue(CmpSwap, 0)));
7602 Results.push_back(SDValue(CmpSwap, 2));
7603}
7604
7605static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
7606 SelectionDAG &DAG) {
7607 const auto &TLI = DAG.getTargetLoweringInfo();
7608
7609 assert(Subtarget.getTargetTriple().isOSMSVCRT() &&(static_cast <bool> (Subtarget.getTargetTriple().isOSMSVCRT
() && "Custom lowering is MSVCRT specific!") ? void (
0) : __assert_fail ("Subtarget.getTargetTriple().isOSMSVCRT() && \"Custom lowering is MSVCRT specific!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7610, __extension__ __PRETTY_FUNCTION__))
7610 "Custom lowering is MSVCRT specific!")(static_cast <bool> (Subtarget.getTargetTriple().isOSMSVCRT
() && "Custom lowering is MSVCRT specific!") ? void (
0) : __assert_fail ("Subtarget.getTargetTriple().isOSMSVCRT() && \"Custom lowering is MSVCRT specific!\""
, "/build/llvm-toolchain-snapshot-6.0~svn318693/lib/Target/ARM/ARMISelLowering.cpp"
, 7610, __extension__ __PRETTY_FUNCTION__))
;
7611
7612 SDLoc dl(Op);
7613 SDValue Val = Op.getOperand(0);
7614 MVT Ty = Val->getSimpleValueType(0);
7615 SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
7616 SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
7617 TLI.getPointerTy(DAG.getDataLayout()));
7618
7619 TargetLowering::ArgListTy Args;
7620 TargetLowering::ArgListEntry Entry;
7621
7622 Entry.Node = Val;
7623 Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
7624 Entry.IsZExt = true;
7625 Args.push_back(Entry);
7626
7627 Entry.Node = Exponent;
7628 Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
7629 Entry.IsZExt = true;
7630 Args.push