Bug Summary

File:lib/Target/ARM/ARMISelLowering.cpp
Warning:line 883, column 13
Excessive padding in 'struct (anonymous at /tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp:883:13)' (8 padding bytes, where 0 is optimal). Optimal fields order: Name, Op, CC, consider reordering the fields or adding explicit padding members

Annotated Source Code

1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
18#include "ARMConstantPoolValue.h"
19#include "ARMISelLowering.h"
20#include "ARMMachineFunctionInfo.h"
21#include "ARMPerfectShuffle.h"
22#include "ARMRegisterInfo.h"
23#include "ARMSelectionDAGInfo.h"
24#include "ARMSubtarget.h"
25#include "MCTargetDesc/ARMAddressingModes.h"
26#include "MCTargetDesc/ARMBaseInfo.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/APInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/BitVector.h"
31#include "llvm/ADT/DenseMap.h"
32#include "llvm/ADT/SmallPtrSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/StringExtras.h"
37#include "llvm/ADT/StringSwitch.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/Triple.h"
40#include "llvm/ADT/Twine.h"
41#include "llvm/Analysis/VectorUtils.h"
42#include "llvm/CodeGen/CallingConvLower.h"
43#include "llvm/CodeGen/ISDOpcodes.h"
44#include "llvm/CodeGen/IntrinsicLowering.h"
45#include "llvm/CodeGen/MachineBasicBlock.h"
46#include "llvm/CodeGen/MachineConstantPool.h"
47#include "llvm/CodeGen/MachineFrameInfo.h"
48#include "llvm/CodeGen/MachineFunction.h"
49#include "llvm/CodeGen/MachineInstr.h"
50#include "llvm/CodeGen/MachineInstrBuilder.h"
51#include "llvm/CodeGen/MachineJumpTableInfo.h"
52#include "llvm/CodeGen/MachineMemOperand.h"
53#include "llvm/CodeGen/MachineOperand.h"
54#include "llvm/CodeGen/MachineRegisterInfo.h"
55#include "llvm/CodeGen/MachineValueType.h"
56#include "llvm/CodeGen/RuntimeLibcalls.h"
57#include "llvm/CodeGen/SelectionDAG.h"
58#include "llvm/CodeGen/SelectionDAGNodes.h"
59#include "llvm/CodeGen/ValueTypes.h"
60#include "llvm/IR/Attributes.h"
61#include "llvm/IR/CallingConv.h"
62#include "llvm/IR/Constant.h"
63#include "llvm/IR/Constants.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/DataLayout.h"
66#include "llvm/IR/DebugLoc.h"
67#include "llvm/IR/DerivedTypes.h"
68#include "llvm/IR/Function.h"
69#include "llvm/IR/GlobalAlias.h"
70#include "llvm/IR/GlobalValue.h"
71#include "llvm/IR/GlobalVariable.h"
72#include "llvm/IR/IRBuilder.h"
73#include "llvm/IR/InlineAsm.h"
74#include "llvm/IR/Instruction.h"
75#include "llvm/IR/Instructions.h"
76#include "llvm/IR/IntrinsicInst.h"
77#include "llvm/IR/Intrinsics.h"
78#include "llvm/IR/Module.h"
79#include "llvm/IR/Type.h"
80#include "llvm/IR/User.h"
81#include "llvm/IR/Value.h"
82#include "llvm/MC/MCInstrDesc.h"
83#include "llvm/MC/MCInstrItineraries.h"
84#include "llvm/MC/MCRegisterInfo.h"
85#include "llvm/MC/MCSchedule.h"
86#include "llvm/Support/AtomicOrdering.h"
87#include "llvm/Support/BranchProbability.h"
88#include "llvm/Support/Casting.h"
89#include "llvm/Support/CodeGen.h"
90#include "llvm/Support/CommandLine.h"
91#include "llvm/Support/Compiler.h"
92#include "llvm/Support/Debug.h"
93#include "llvm/Support/ErrorHandling.h"
94#include "llvm/Support/MathExtras.h"
95#include "llvm/Support/raw_ostream.h"
96#include "llvm/Target/TargetInstrInfo.h"
97#include "llvm/Target/TargetMachine.h"
98#include "llvm/Target/TargetOptions.h"
99#include <algorithm>
100#include <cassert>
101#include <cstdint>
102#include <cstdlib>
103#include <iterator>
104#include <limits>
105#include <tuple>
106#include <string>
107#include <utility>
108#include <vector>
109
110using namespace llvm;
111
112#define DEBUG_TYPE"arm-isel" "arm-isel"
113
114STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
115STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt"
, "Number of GAs materialized with movw + movt", {0}, false}
;
116STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals"
, "Number of loops generated for byval arguments", {0}, false
}
;
117STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
, {0}, false}
118 "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
, {0}, false}
;
119
120static cl::opt<bool>
121ARMInterworking("arm-interworking", cl::Hidden,
122 cl::desc("Enable / disable ARM interworking (for debugging only)"),
123 cl::init(true));
124
125static cl::opt<bool> EnableConstpoolPromotion(
126 "arm-promote-constant", cl::Hidden,
127 cl::desc("Enable / disable promotion of unnamed_addr constants into "
128 "constant pools"),
129 cl::init(true));
130static cl::opt<unsigned> ConstpoolPromotionMaxSize(
131 "arm-promote-constant-max-size", cl::Hidden,
132 cl::desc("Maximum size of constant to promote into a constant pool"),
133 cl::init(64));
134static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
135 "arm-promote-constant-max-total", cl::Hidden,
136 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
137 cl::init(128));
138
139// The APCS parameter registers.
140static const MCPhysReg GPRArgRegs[] = {
141 ARM::R0, ARM::R1, ARM::R2, ARM::R3
142};
143
144void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
145 MVT PromotedBitwiseVT) {
146 if (VT != PromotedLdStVT) {
147 setOperationAction(ISD::LOAD, VT, Promote);
148 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
149
150 setOperationAction(ISD::STORE, VT, Promote);
151 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
152 }
153
154 MVT ElemTy = VT.getVectorElementType();
155 if (ElemTy != MVT::f64)
156 setOperationAction(ISD::SETCC, VT, Custom);
157 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
159 if (ElemTy == MVT::i32) {
160 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
161 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
162 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
163 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
164 } else {
165 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
166 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
167 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
168 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
169 }
170 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
171 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
172 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
173 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
174 setOperationAction(ISD::SELECT, VT, Expand);
175 setOperationAction(ISD::SELECT_CC, VT, Expand);
176 setOperationAction(ISD::VSELECT, VT, Expand);
177 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
178 if (VT.isInteger()) {
179 setOperationAction(ISD::SHL, VT, Custom);
180 setOperationAction(ISD::SRA, VT, Custom);
181 setOperationAction(ISD::SRL, VT, Custom);
182 }
183
184 // Promote all bit-wise operations.
185 if (VT.isInteger() && VT != PromotedBitwiseVT) {
186 setOperationAction(ISD::AND, VT, Promote);
187 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
188 setOperationAction(ISD::OR, VT, Promote);
189 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
190 setOperationAction(ISD::XOR, VT, Promote);
191 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
192 }
193
194 // Neon does not support vector divide/remainder operations.
195 setOperationAction(ISD::SDIV, VT, Expand);
196 setOperationAction(ISD::UDIV, VT, Expand);
197 setOperationAction(ISD::FDIV, VT, Expand);
198 setOperationAction(ISD::SREM, VT, Expand);
199 setOperationAction(ISD::UREM, VT, Expand);
200 setOperationAction(ISD::FREM, VT, Expand);
201
202 if (!VT.isFloatingPoint() &&
203 VT != MVT::v2i64 && VT != MVT::v1i64)
204 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
205 setOperationAction(Opcode, VT, Legal);
206}
207
208void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
209 addRegisterClass(VT, &ARM::DPRRegClass);
210 addTypeForNEON(VT, MVT::f64, MVT::v2i32);
211}
212
213void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
214 addRegisterClass(VT, &ARM::DPairRegClass);
215 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
216}
217
218ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
219 const ARMSubtarget &STI)
220 : TargetLowering(TM), Subtarget(&STI) {
221 RegInfo = Subtarget->getRegisterInfo();
222 Itins = Subtarget->getInstrItineraryData();
223
224 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
225
226 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
227 !Subtarget->isTargetWatchOS()) {
228 const auto &E = Subtarget->getTargetTriple().getEnvironment();
229
230 bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF ||
231 E == Triple::MuslEABIHF;
232 // Windows is a special case. Technically, we will replace all of the "GNU"
233 // calls with calls to MSVCRT if appropriate and adjust the calling
234 // convention then.
235 IsHFTarget = IsHFTarget || Subtarget->isTargetWindows();
236
237 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
238 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
239 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
240 : CallingConv::ARM_AAPCS);
241 }
242
243 if (Subtarget->isTargetMachO()) {
244 // Uses VFP for Thumb libfuncs if available.
245 if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
246 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
247 static const struct {
248 const RTLIB::Libcall Op;
249 const char * const Name;
250 const ISD::CondCode Cond;
251 } LibraryCalls[] = {
252 // Single-precision floating-point arithmetic.
253 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
254 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
255 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
256 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
257
258 // Double-precision floating-point arithmetic.
259 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
260 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
261 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
262 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
263
264 // Single-precision comparisons.
265 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
266 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
267 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
268 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
269 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
270 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
271 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
272 { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
273
274 // Double-precision comparisons.
275 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
276 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
277 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
278 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
279 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
280 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
281 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
282 { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
283
284 // Floating-point to integer conversions.
285 // i64 conversions are done via library routines even when generating VFP
286 // instructions, so use the same ones.
287 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
288 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
289 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
290 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
291
292 // Conversions between floating types.
293 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
294 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
295
296 // Integer to floating-point conversions.
297 // i64 conversions are done via library routines even when generating VFP
298 // instructions, so use the same ones.
299 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
300 // e.g., __floatunsidf vs. __floatunssidfvfp.
301 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
302 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
303 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
304 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
305 };
306
307 for (const auto &LC : LibraryCalls) {
308 setLibcallName(LC.Op, LC.Name);
309 if (LC.Cond != ISD::SETCC_INVALID)
310 setCmpLibcallCC(LC.Op, LC.Cond);
311 }
312 }
313
314 // Set the correct calling convention for ARMv7k WatchOS. It's just
315 // AAPCS_VFP for functions as simple as libcalls.
316 if (Subtarget->isTargetWatchABI()) {
317 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
318 setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
319 }
320 }
321
322 // These libcalls are not available in 32-bit.
323 setLibcallName(RTLIB::SHL_I128, nullptr);
324 setLibcallName(RTLIB::SRL_I128, nullptr);
325 setLibcallName(RTLIB::SRA_I128, nullptr);
326
327 // RTLIB
328 if (Subtarget->isAAPCS_ABI() &&
329 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
330 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
331 static const struct {
332 const RTLIB::Libcall Op;
333 const char * const Name;
334 const CallingConv::ID CC;
335 const ISD::CondCode Cond;
336 } LibraryCalls[] = {
337 // Double-precision floating-point arithmetic helper functions
338 // RTABI chapter 4.1.2, Table 2
339 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
340 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
341 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
342 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
343
344 // Double-precision floating-point comparison helper functions
345 // RTABI chapter 4.1.2, Table 3
346 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
347 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
348 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
349 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
350 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
351 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
352 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
353 { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
354
355 // Single-precision floating-point arithmetic helper functions
356 // RTABI chapter 4.1.2, Table 4
357 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
358 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
359 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
360 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
361
362 // Single-precision floating-point comparison helper functions
363 // RTABI chapter 4.1.2, Table 5
364 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
365 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
366 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
367 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
368 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
369 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
370 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
371 { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
372
373 // Floating-point to integer conversions.
374 // RTABI chapter 4.1.2, Table 6
375 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
376 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
377 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
381 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
382 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
383
384 // Conversions between floating types.
385 // RTABI chapter 4.1.2, Table 7
386 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
388 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
389
390 // Integer to floating-point conversions.
391 // RTABI chapter 4.1.2, Table 8
392 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
393 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
394 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
399 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
400
401 // Long long helper functions
402 // RTABI chapter 4.2, Table 9
403 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
406 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
407
408 // Integer division functions
409 // RTABI chapter 4.3.1
410 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
411 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
412 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
413 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
414 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
415 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
416 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
417 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
418 };
419
420 for (const auto &LC : LibraryCalls) {
421 setLibcallName(LC.Op, LC.Name);
422 setLibcallCallingConv(LC.Op, LC.CC);
423 if (LC.Cond != ISD::SETCC_INVALID)
424 setCmpLibcallCC(LC.Op, LC.Cond);
425 }
426
427 // EABI dependent RTLIB
428 if (TM.Options.EABIVersion == EABI::EABI4 ||
429 TM.Options.EABIVersion == EABI::EABI5) {
430 static const struct {
431 const RTLIB::Libcall Op;
432 const char *const Name;
433 const CallingConv::ID CC;
434 const ISD::CondCode Cond;
435 } MemOpsLibraryCalls[] = {
436 // Memory operations
437 // RTABI chapter 4.3.4
438 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
439 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
440 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
441 };
442
443 for (const auto &LC : MemOpsLibraryCalls) {
444 setLibcallName(LC.Op, LC.Name);
445 setLibcallCallingConv(LC.Op, LC.CC);
446 if (LC.Cond != ISD::SETCC_INVALID)
447 setCmpLibcallCC(LC.Op, LC.Cond);
448 }
449 }
450 }
451
452 if (Subtarget->isTargetWindows()) {
453 static const struct {
454 const RTLIB::Libcall Op;
455 const char * const Name;
456 const CallingConv::ID CC;
457 } LibraryCalls[] = {
458 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
459 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
460 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
461 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
462 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
463 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
464 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
465 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
466 };
467
468 for (const auto &LC : LibraryCalls) {
469 setLibcallName(LC.Op, LC.Name);
470 setLibcallCallingConv(LC.Op, LC.CC);
471 }
472 }
473
474 // Use divmod compiler-rt calls for iOS 5.0 and later.
475 if (Subtarget->isTargetWatchOS() ||
476 (Subtarget->isTargetIOS() &&
477 !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
478 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
479 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
480 }
481
482 // The half <-> float conversion functions are always soft-float on
483 // non-watchos platforms, but are needed for some targets which use a
484 // hard-float calling convention by default.
485 if (!Subtarget->isTargetWatchABI()) {
486 if (Subtarget->isAAPCS_ABI()) {
487 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
488 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
489 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
490 } else {
491 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
492 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
493 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
494 }
495 }
496
497 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
498 // a __gnu_ prefix (which is the default).
499 if (Subtarget->isTargetAEABI()) {
500 static const struct {
501 const RTLIB::Libcall Op;
502 const char * const Name;
503 const CallingConv::ID CC;
504 } LibraryCalls[] = {
505 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
506 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
507 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
508 };
509
510 for (const auto &LC : LibraryCalls) {
511 setLibcallName(LC.Op, LC.Name);
512 setLibcallCallingConv(LC.Op, LC.CC);
513 }
514 }
515
516 if (Subtarget->isThumb1Only())
517 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
518 else
519 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
520
521 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
522 !Subtarget->isThumb1Only()) {
523 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
524 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
525 }
526
527 for (MVT VT : MVT::vector_valuetypes()) {
528 for (MVT InnerVT : MVT::vector_valuetypes()) {
529 setTruncStoreAction(VT, InnerVT, Expand);
530 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
531 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
532 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
533 }
534
535 setOperationAction(ISD::MULHS, VT, Expand);
536 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
537 setOperationAction(ISD::MULHU, VT, Expand);
538 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
539
540 setOperationAction(ISD::BSWAP, VT, Expand);
541 }
542
543 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
544 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
545
546 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
547 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
548
549 if (Subtarget->hasNEON()) {
550 addDRTypeForNEON(MVT::v2f32);
551 addDRTypeForNEON(MVT::v8i8);
552 addDRTypeForNEON(MVT::v4i16);
553 addDRTypeForNEON(MVT::v2i32);
554 addDRTypeForNEON(MVT::v1i64);
555
556 addQRTypeForNEON(MVT::v4f32);
557 addQRTypeForNEON(MVT::v2f64);
558 addQRTypeForNEON(MVT::v16i8);
559 addQRTypeForNEON(MVT::v8i16);
560 addQRTypeForNEON(MVT::v4i32);
561 addQRTypeForNEON(MVT::v2i64);
562
563 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
564 // neither Neon nor VFP support any arithmetic operations on it.
565 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
566 // supported for v4f32.
567 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
568 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
569 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
570 // FIXME: Code duplication: FDIV and FREM are expanded always, see
571 // ARMTargetLowering::addTypeForNEON method for details.
572 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
573 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
574 // FIXME: Create unittest.
575 // In another words, find a way when "copysign" appears in DAG with vector
576 // operands.
577 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
578 // FIXME: Code duplication: SETCC has custom operation action, see
579 // ARMTargetLowering::addTypeForNEON method for details.
580 setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
581 // FIXME: Create unittest for FNEG and for FABS.
582 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
583 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
584 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
585 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
586 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
587 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
588 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
589 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
590 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
591 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
592 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
593 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
594 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
595 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
596 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
597 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
598 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
599 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
600 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
601
602 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
603 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
604 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
605 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
606 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
607 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
608 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
609 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
610 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
611 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
612 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
613 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
614 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
615 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
616 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
617
618 // Mark v2f32 intrinsics.
619 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
620 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
621 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
622 setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
623 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
624 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
625 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
626 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
627 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
628 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
629 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
630 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
631 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
632 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
633 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
634
635 // Neon does not support some operations on v1i64 and v2i64 types.
636 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
637 // Custom handling for some quad-vector types to detect VMULL.
638 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
639 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
640 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
641 // Custom handling for some vector types to avoid expensive expansions
642 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
643 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
644 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
645 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
646 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
647 // a destination type that is wider than the source, and nor does
648 // it have a FP_TO_[SU]INT instruction with a narrower destination than
649 // source.
650 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
651 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
652 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
653 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
654
655 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
656 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
657
658 // NEON does not have single instruction CTPOP for vectors with element
659 // types wider than 8-bits. However, custom lowering can leverage the
660 // v8i8/v16i8 vcnt instruction.
661 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
662 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
663 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
664 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
665 setOperationAction(ISD::CTPOP, MVT::v1i64, Expand);
666 setOperationAction(ISD::CTPOP, MVT::v2i64, Expand);
667
668 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
669 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
670
671 // NEON does not have single instruction CTTZ for vectors.
672 setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
673 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
674 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
675 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
676
677 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
678 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
679 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
680 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
681
682 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
683 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
684 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
685 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
686
687 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
688 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
689 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
690 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
691
692 // NEON only has FMA instructions as of VFP4.
693 if (!Subtarget->hasVFP4()) {
694 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
695 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
696 }
697
698 setTargetDAGCombine(ISD::INTRINSIC_VOID);
699 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
700 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
701 setTargetDAGCombine(ISD::SHL);
702 setTargetDAGCombine(ISD::SRL);
703 setTargetDAGCombine(ISD::SRA);
704 setTargetDAGCombine(ISD::SIGN_EXTEND);
705 setTargetDAGCombine(ISD::ZERO_EXTEND);
706 setTargetDAGCombine(ISD::ANY_EXTEND);
707 setTargetDAGCombine(ISD::BUILD_VECTOR);
708 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
709 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
710 setTargetDAGCombine(ISD::STORE);
711 setTargetDAGCombine(ISD::FP_TO_SINT);
712 setTargetDAGCombine(ISD::FP_TO_UINT);
713 setTargetDAGCombine(ISD::FDIV);
714 setTargetDAGCombine(ISD::LOAD);
715
716 // It is legal to extload from v4i8 to v4i16 or v4i32.
717 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
718 MVT::v2i32}) {
719 for (MVT VT : MVT::integer_vector_valuetypes()) {
720 setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
721 setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
722 setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
723 }
724 }
725 }
726
727 if (Subtarget->isFPOnlySP()) {
728 // When targeting a floating-point unit with only single-precision
729 // operations, f64 is legal for the few double-precision instructions which
730 // are present However, no double-precision operations other than moves,
731 // loads and stores are provided by the hardware.
732 setOperationAction(ISD::FADD, MVT::f64, Expand);
733 setOperationAction(ISD::FSUB, MVT::f64, Expand);
734 setOperationAction(ISD::FMUL, MVT::f64, Expand);
735 setOperationAction(ISD::FMA, MVT::f64, Expand);
736 setOperationAction(ISD::FDIV, MVT::f64, Expand);
737 setOperationAction(ISD::FREM, MVT::f64, Expand);
738 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
739 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
740 setOperationAction(ISD::FNEG, MVT::f64, Expand);
741 setOperationAction(ISD::FABS, MVT::f64, Expand);
742 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
743 setOperationAction(ISD::FSIN, MVT::f64, Expand);
744 setOperationAction(ISD::FCOS, MVT::f64, Expand);
745 setOperationAction(ISD::FPOWI, MVT::f64, Expand);
746 setOperationAction(ISD::FPOW, MVT::f64, Expand);
747 setOperationAction(ISD::FLOG, MVT::f64, Expand);
748 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
749 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
750 setOperationAction(ISD::FEXP, MVT::f64, Expand);
751 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
752 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
753 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
754 setOperationAction(ISD::FRINT, MVT::f64, Expand);
755 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
756 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
757 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
758 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
759 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
760 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
761 setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
762 setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
763 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
764 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
765 }
766
767 computeRegisterProperties(Subtarget->getRegisterInfo());
768
769 // ARM does not have floating-point extending loads.
770 for (MVT VT : MVT::fp_valuetypes()) {
771 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
772 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
773 }
774
775 // ... or truncating stores
776 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
777 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
778 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
779
780 // ARM does not have i1 sign extending load.
781 for (MVT VT : MVT::integer_valuetypes())
782 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
783
784 // ARM supports all 4 flavors of integer indexed load / store.
785 if (!Subtarget->isThumb1Only()) {
786 for (unsigned im = (unsigned)ISD::PRE_INC;
787 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
788 setIndexedLoadAction(im, MVT::i1, Legal);
789 setIndexedLoadAction(im, MVT::i8, Legal);
790 setIndexedLoadAction(im, MVT::i16, Legal);
791 setIndexedLoadAction(im, MVT::i32, Legal);
792 setIndexedStoreAction(im, MVT::i1, Legal);
793 setIndexedStoreAction(im, MVT::i8, Legal);
794 setIndexedStoreAction(im, MVT::i16, Legal);
795 setIndexedStoreAction(im, MVT::i32, Legal);
796 }
797 } else {
798 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
799 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
800 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
801 }
802
803 setOperationAction(ISD::SADDO, MVT::i32, Custom);
804 setOperationAction(ISD::UADDO, MVT::i32, Custom);
805 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
806 setOperationAction(ISD::USUBO, MVT::i32, Custom);
807
808 // i64 operation support.
809 setOperationAction(ISD::MUL, MVT::i64, Expand);
810 setOperationAction(ISD::MULHU, MVT::i32, Expand);
811 if (Subtarget->isThumb1Only()) {
812 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
813 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
814 }
815 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
816 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
817 setOperationAction(ISD::MULHS, MVT::i32, Expand);
818
819 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
820 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
821 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
822 setOperationAction(ISD::SRL, MVT::i64, Custom);
823 setOperationAction(ISD::SRA, MVT::i64, Custom);
824
825 setOperationAction(ISD::ADDC, MVT::i32, Custom);
826 setOperationAction(ISD::ADDE, MVT::i32, Custom);
827 setOperationAction(ISD::SUBC, MVT::i32, Custom);
828 setOperationAction(ISD::SUBE, MVT::i32, Custom);
829
830 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
831 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
832
833 // ARM does not have ROTL.
834 setOperationAction(ISD::ROTL, MVT::i32, Expand);
835 for (MVT VT : MVT::vector_valuetypes()) {
836 setOperationAction(ISD::ROTL, VT, Expand);
837 setOperationAction(ISD::ROTR, VT, Expand);
838 }
839 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
840 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
841 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
842 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
843
844 // @llvm.readcyclecounter requires the Performance Monitors extension.
845 // Default to the 0 expansion on unsupported platforms.
846 // FIXME: Technically there are older ARM CPUs that have
847 // implementation-specific ways of obtaining this information.
848 if (Subtarget->hasPerfMon())
849 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
850
851 // Only ARMv6 has BSWAP.
852 if (!Subtarget->hasV6Ops())
853 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
854
855 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivideInThumbMode()
856 : Subtarget->hasDivideInARMMode();
857 if (!hasDivide) {
858 // These are expanded into libcalls if the cpu doesn't have HW divider.
859 setOperationAction(ISD::SDIV, MVT::i32, LibCall);
860 setOperationAction(ISD::UDIV, MVT::i32, LibCall);
861 }
862
863 if (Subtarget->isTargetWindows() && !Subtarget->hasDivideInThumbMode()) {
864 setOperationAction(ISD::SDIV, MVT::i32, Custom);
865 setOperationAction(ISD::UDIV, MVT::i32, Custom);
866
867 setOperationAction(ISD::SDIV, MVT::i64, Custom);
868 setOperationAction(ISD::UDIV, MVT::i64, Custom);
869 }
870
871 setOperationAction(ISD::SREM, MVT::i32, Expand);
872 setOperationAction(ISD::UREM, MVT::i32, Expand);
873
874 // Register based DivRem for AEABI (RTABI 4.2)
875 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
876 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
877 Subtarget->isTargetWindows()) {
878 setOperationAction(ISD::SREM, MVT::i64, Custom);
879 setOperationAction(ISD::UREM, MVT::i64, Custom);
880 HasStandaloneRem = false;
881
882 if (Subtarget->isTargetWindows()) {
883 const struct {
Excessive padding in 'struct (anonymous at /tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp:883:13)' (8 padding bytes, where 0 is optimal). Optimal fields order: Name, Op, CC, consider reordering the fields or adding explicit padding members
884 const RTLIB::Libcall Op;
885 const char * const Name;
886 const CallingConv::ID CC;
887 } LibraryCalls[] = {
888 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
889 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
890 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
891 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
892
893 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
894 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
895 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
896 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
897 };
898
899 for (const auto &LC : LibraryCalls) {
900 setLibcallName(LC.Op, LC.Name);
901 setLibcallCallingConv(LC.Op, LC.CC);
902 }
903 } else {
904 const struct {
905 const RTLIB::Libcall Op;
906 const char * const Name;
907 const CallingConv::ID CC;
908 } LibraryCalls[] = {
909 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
910 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
911 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
912 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
913
914 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
915 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
916 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
917 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
918 };
919
920 for (const auto &LC : LibraryCalls) {
921 setLibcallName(LC.Op, LC.Name);
922 setLibcallCallingConv(LC.Op, LC.CC);
923 }
924 }
925
926 setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
927 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
928 setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
929 setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
930 } else {
931 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
932 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
933 }
934
935 if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
936 for (auto &VT : {MVT::f32, MVT::f64})
937 setOperationAction(ISD::FPOWI, VT, Custom);
938
939 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
940 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
941 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
942 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
943
944 setOperationAction(ISD::TRAP, MVT::Other, Legal);
945
946 // Use the default implementation.
947 setOperationAction(ISD::VASTART, MVT::Other, Custom);
948 setOperationAction(ISD::VAARG, MVT::Other, Expand);
949 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
950 setOperationAction(ISD::VAEND, MVT::Other, Expand);
951 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
952 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
953
954 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
955 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
956 else
957 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
958
959 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
960 // the default expansion.
961 InsertFencesForAtomic = false;
962 if (Subtarget->hasAnyDataBarrier() &&
963 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
964 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
965 // to ldrex/strex loops already.
966 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
967 if (!Subtarget->isThumb() || !Subtarget->isMClass())
968 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
969
970 // On v8, we have particularly efficient implementations of atomic fences
971 // if they can be combined with nearby atomic loads and stores.
972 if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
973 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
974 InsertFencesForAtomic = true;
975 }
976 } else {
977 // If there's anything we can use as a barrier, go through custom lowering
978 // for ATOMIC_FENCE.
979 // If target has DMB in thumb, Fences can be inserted.
980 if (Subtarget->hasDataBarrier())
981 InsertFencesForAtomic = true;
982
983 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
984 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
985
986 // Set them all for expansion, which will force libcalls.
987 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
988 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
989 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
990 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
991 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
992 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
993 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
994 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
995 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
996 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
997 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
998 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
999 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1000 // Unordered/Monotonic case.
1001 if (!InsertFencesForAtomic) {
1002 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1003 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1004 }
1005 }
1006
1007 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1008
1009 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1010 if (!Subtarget->hasV6Ops()) {
1011 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
1012 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
1013 }
1014 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1015
1016 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1017 !Subtarget->isThumb1Only()) {
1018 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1019 // iff target supports vfp2.
1020 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1021 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
1022 }
1023
1024 // We want to custom lower some of our intrinsics.
1025 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1026 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
1027 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
1028 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
1029 if (Subtarget->useSjLjEH())
1030 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1031
1032 setOperationAction(ISD::SETCC, MVT::i32, Expand);
1033 setOperationAction(ISD::SETCC, MVT::f32, Expand);
1034 setOperationAction(ISD::SETCC, MVT::f64, Expand);
1035 setOperationAction(ISD::SELECT, MVT::i32, Custom);
1036 setOperationAction(ISD::SELECT, MVT::f32, Custom);
1037 setOperationAction(ISD::SELECT, MVT::f64, Custom);
1038 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1039 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
1040 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
1041
1042 // Thumb-1 cannot currently select ARMISD::SUBE.
1043 if (!Subtarget->isThumb1Only())
1044 setOperationAction(ISD::SETCCE, MVT::i32, Custom);
1045
1046 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
1047 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1048 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1049 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1050 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1051
1052 // We don't support sin/cos/fmod/copysign/pow
1053 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1054 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1055 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1056 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1057 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1058 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1059 setOperationAction(ISD::FREM, MVT::f64, Expand);
1060 setOperationAction(ISD::FREM, MVT::f32, Expand);
1061 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1062 !Subtarget->isThumb1Only()) {
1063 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
1064 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
1065 }
1066 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1067 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1068
1069 if (!Subtarget->hasVFP4()) {
1070 setOperationAction(ISD::FMA, MVT::f64, Expand);
1071 setOperationAction(ISD::FMA, MVT::f32, Expand);
1072 }
1073
1074 // Various VFP goodness
1075 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1076 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1077 if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1078 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1079 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1080 }
1081
1082 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1083 if (!Subtarget->hasFP16()) {
1084 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1085 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1086 }
1087 }
1088
1089 // Combine sin / cos into one node or libcall if possible.
1090 if (Subtarget->hasSinCos()) {
1091 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1092 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1093 if (Subtarget->isTargetWatchABI()) {
1094 setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
1095 setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
1096 }
1097 if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
1098 // For iOS, we don't want to the normal expansion of a libcall to
1099 // sincos. We want to issue a libcall to __sincos_stret.
1100 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1101 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1102 }
1103 }
1104
1105 // FP-ARMv8 implements a lot of rounding-like FP operations.
1106 if (Subtarget->hasFPARMv8()) {
1107 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1108 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1109 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1110 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1111 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1112 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1113 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1114 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1115 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1116 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1117 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1118 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1119
1120 if (!Subtarget->isFPOnlySP()) {
1121 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1122 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1123 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1124 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1125 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1126 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1127 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1128 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1129 }
1130 }
1131
1132 if (Subtarget->hasNEON()) {
1133 // vmin and vmax aren't available in a scalar form, so we use
1134 // a NEON instruction with an undef lane instead.
1135 setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
1136 setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
1137 setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
1138 setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
1139 setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
1140 setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
1141 }
1142
1143 // We have target-specific dag combine patterns for the following nodes:
1144 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1145 setTargetDAGCombine(ISD::ADD);
1146 setTargetDAGCombine(ISD::SUB);
1147 setTargetDAGCombine(ISD::MUL);
1148 setTargetDAGCombine(ISD::AND);
1149 setTargetDAGCombine(ISD::OR);
1150 setTargetDAGCombine(ISD::XOR);
1151
1152 if (Subtarget->hasV6Ops())
1153 setTargetDAGCombine(ISD::SRL);
1154
1155 setStackPointerRegisterToSaveRestore(ARM::SP);
1156
1157 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1158 !Subtarget->hasVFP2())
1159 setSchedulingPreference(Sched::RegPressure);
1160 else
1161 setSchedulingPreference(Sched::Hybrid);
1162
1163 //// temporary - rewrite interface to use type
1164 MaxStoresPerMemset = 8;
1165 MaxStoresPerMemsetOptSize = 4;
1166 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1167 MaxStoresPerMemcpyOptSize = 2;
1168 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1169 MaxStoresPerMemmoveOptSize = 2;
1170
1171 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1172 // are at least 4 bytes aligned.
1173 setMinStackArgumentAlignment(4);
1174
1175 // Prefer likely predicted branches to selects on out-of-order cores.
1176 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1177
1178 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1179}
1180
1181bool ARMTargetLowering::useSoftFloat() const {
1182 return Subtarget->useSoftFloat();
1183}
1184
1185// FIXME: It might make sense to define the representative register class as the
1186// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1187// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1188// SPR's representative would be DPR_VFP2. This should work well if register
1189// pressure tracking were modified such that a register use would increment the
1190// pressure of the register class's representative and all of it's super
1191// classes' representatives transitively. We have not implemented this because
1192// of the difficulty prior to coalescing of modeling operand register classes
1193// due to the common occurrence of cross class copies and subregister insertions
1194// and extractions.
1195std::pair<const TargetRegisterClass *, uint8_t>
1196ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1197 MVT VT) const {
1198 const TargetRegisterClass *RRC = nullptr;
1199 uint8_t Cost = 1;
1200 switch (VT.SimpleTy) {
1201 default:
1202 return TargetLowering::findRepresentativeClass(TRI, VT);
1203 // Use DPR as representative register class for all floating point
1204 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1205 // the cost is 1 for both f32 and f64.
1206 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1207 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1208 RRC = &ARM::DPRRegClass;
1209 // When NEON is used for SP, only half of the register file is available
1210 // because operations that define both SP and DP results will be constrained
1211 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1212 // coalescing by double-counting the SP regs. See the FIXME above.
1213 if (Subtarget->useNEONForSinglePrecisionFP())
1214 Cost = 2;
1215 break;
1216 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1217 case MVT::v4f32: case MVT::v2f64:
1218 RRC = &ARM::DPRRegClass;
1219 Cost = 2;
1220 break;
1221 case MVT::v4i64:
1222 RRC = &ARM::DPRRegClass;
1223 Cost = 4;
1224 break;
1225 case MVT::v8i64:
1226 RRC = &ARM::DPRRegClass;
1227 Cost = 8;
1228 break;
1229 }
1230 return std::make_pair(RRC, Cost);
1231}
1232
1233const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1234 switch ((ARMISD::NodeType)Opcode) {
1235 case ARMISD::FIRST_NUMBER: break;
1236 case ARMISD::Wrapper: return "ARMISD::Wrapper";
1237 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1238 case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1239 case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1240 case ARMISD::CALL: return "ARMISD::CALL";
1241 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1242 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1243 case ARMISD::BRCOND: return "ARMISD::BRCOND";
1244 case ARMISD::BR_JT: return "ARMISD::BR_JT";
1245 case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1246 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1247 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1248 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1249 case ARMISD::CMP: return "ARMISD::CMP";
1250 case ARMISD::CMN: return "ARMISD::CMN";
1251 case ARMISD::CMPZ: return "ARMISD::CMPZ";
1252 case ARMISD::CMPFP: return "ARMISD::CMPFP";
1253 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1254 case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1255 case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1256
1257 case ARMISD::CMOV: return "ARMISD::CMOV";
1258
1259 case ARMISD::SSAT: return "ARMISD::SSAT";
1260
1261 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1262 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1263 case ARMISD::RRX: return "ARMISD::RRX";
1264
1265 case ARMISD::ADDC: return "ARMISD::ADDC";
1266 case ARMISD::ADDE: return "ARMISD::ADDE";
1267 case ARMISD::SUBC: return "ARMISD::SUBC";
1268 case ARMISD::SUBE: return "ARMISD::SUBE";
1269
1270 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1271 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1272
1273 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1274 case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1275 case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1276
1277 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1278
1279 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1280
1281 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1282
1283 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1284
1285 case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1286
1287 case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1288 case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1289
1290 case ARMISD::VCEQ: return "ARMISD::VCEQ";
1291 case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1292 case ARMISD::VCGE: return "ARMISD::VCGE";
1293 case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1294 case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1295 case ARMISD::VCGEU: return "ARMISD::VCGEU";
1296 case ARMISD::VCGT: return "ARMISD::VCGT";
1297 case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1298 case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1299 case ARMISD::VCGTU: return "ARMISD::VCGTU";
1300 case ARMISD::VTST: return "ARMISD::VTST";
1301
1302 case ARMISD::VSHL: return "ARMISD::VSHL";
1303 case ARMISD::VSHRs: return "ARMISD::VSHRs";
1304 case ARMISD::VSHRu: return "ARMISD::VSHRu";
1305 case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1306 case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1307 case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1308 case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1309 case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1310 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1311 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1312 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1313 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1314 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1315 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1316 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1317 case ARMISD::VSLI: return "ARMISD::VSLI";
1318 case ARMISD::VSRI: return "ARMISD::VSRI";
1319 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1320 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1321 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1322 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1323 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1324 case ARMISD::VDUP: return "ARMISD::VDUP";
1325 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1326 case ARMISD::VEXT: return "ARMISD::VEXT";
1327 case ARMISD::VREV64: return "ARMISD::VREV64";
1328 case ARMISD::VREV32: return "ARMISD::VREV32";
1329 case ARMISD::VREV16: return "ARMISD::VREV16";
1330 case ARMISD::VZIP: return "ARMISD::VZIP";
1331 case ARMISD::VUZP: return "ARMISD::VUZP";
1332 case ARMISD::VTRN: return "ARMISD::VTRN";
1333 case ARMISD::VTBL1: return "ARMISD::VTBL1";
1334 case ARMISD::VTBL2: return "ARMISD::VTBL2";
1335 case ARMISD::VMULLs: return "ARMISD::VMULLs";
1336 case ARMISD::VMULLu: return "ARMISD::VMULLu";
1337 case ARMISD::UMAAL: return "ARMISD::UMAAL";
1338 case ARMISD::UMLAL: return "ARMISD::UMLAL";
1339 case ARMISD::SMLAL: return "ARMISD::SMLAL";
1340 case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1341 case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1342 case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1343 case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1344 case ARMISD::SMULWB: return "ARMISD::SMULWB";
1345 case ARMISD::SMULWT: return "ARMISD::SMULWT";
1346 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1347 case ARMISD::BFI: return "ARMISD::BFI";
1348 case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1349 case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1350 case ARMISD::VBSL: return "ARMISD::VBSL";
1351 case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1352 case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1353 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1354 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1355 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1356 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1357 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1358 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1359 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1360 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1361 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1362 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1363 case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1364 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1365 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1366 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1367 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1368 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1369 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1370 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1371 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1372 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1373 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1374 }
1375 return nullptr;
1376}
1377
1378EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1379 EVT VT) const {
1380 if (!VT.isVector())
1381 return getPointerTy(DL);
1382 return VT.changeVectorElementTypeToInteger();
1383}
1384
1385/// getRegClassFor - Return the register class that should be used for the
1386/// specified value type.
1387const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
1388 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1389 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1390 // load / store 4 to 8 consecutive D registers.
1391 if (Subtarget->hasNEON()) {
1392 if (VT == MVT::v4i64)
1393 return &ARM::QQPRRegClass;
1394 if (VT == MVT::v8i64)
1395 return &ARM::QQQQPRRegClass;
1396 }
1397 return TargetLowering::getRegClassFor(VT);
1398}
1399
1400// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1401// source/dest is aligned and the copy size is large enough. We therefore want
1402// to align such objects passed to memory intrinsics.
1403bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
1404 unsigned &PrefAlign) const {
1405 if (!isa<MemIntrinsic>(CI))
1406 return false;
1407 MinSize = 8;
1408 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1409 // cycle faster than 4-byte aligned LDM.
1410 PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1411 return true;
1412}
1413
1414// Create a fast isel object.
1415FastISel *
1416ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1417 const TargetLibraryInfo *libInfo) const {
1418 return ARM::createFastISel(funcInfo, libInfo);
1419}
1420
1421Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1422 unsigned NumVals = N->getNumValues();
1423 if (!NumVals)
1424 return Sched::RegPressure;
1425
1426 for (unsigned i = 0; i != NumVals; ++i) {
1427 EVT VT = N->getValueType(i);
1428 if (VT == MVT::Glue || VT == MVT::Other)
1429 continue;
1430 if (VT.isFloatingPoint() || VT.isVector())
1431 return Sched::ILP;
1432 }
1433
1434 if (!N->isMachineOpcode())
1435 return Sched::RegPressure;
1436
1437 // Load are scheduled for latency even if there instruction itinerary
1438 // is not available.
1439 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1440 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1441
1442 if (MCID.getNumDefs() == 0)
1443 return Sched::RegPressure;
1444 if (!Itins->isEmpty() &&
1445 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1446 return Sched::ILP;
1447
1448 return Sched::RegPressure;
1449}
1450
1451//===----------------------------------------------------------------------===//
1452// Lowering Code
1453//===----------------------------------------------------------------------===//
1454
1455static bool isSRL16(const SDValue &Op) {
1456 if (Op.getOpcode() != ISD::SRL)
1457 return false;
1458 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1459 return Const->getZExtValue() == 16;
1460 return false;
1461}
1462
1463static bool isSRA16(const SDValue &Op) {
1464 if (Op.getOpcode() != ISD::SRA)
1465 return false;
1466 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1467 return Const->getZExtValue() == 16;
1468 return false;
1469}
1470
1471static bool isSHL16(const SDValue &Op) {
1472 if (Op.getOpcode() != ISD::SHL)
1473 return false;
1474 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1475 return Const->getZExtValue() == 16;
1476 return false;
1477}
1478
1479// Check for a signed 16-bit value. We special case SRA because it makes it
1480// more simple when also looking for SRAs that aren't sign extending a
1481// smaller value. Without the check, we'd need to take extra care with
1482// checking order for some operations.
1483static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1484 if (isSRA16(Op))
1485 return isSHL16(Op.getOperand(0));
1486 return DAG.ComputeNumSignBits(Op) == 17;
1487}
1488
1489/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1490static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1491 switch (CC) {
1492 default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1492)
;
1493 case ISD::SETNE: return ARMCC::NE;
1494 case ISD::SETEQ: return ARMCC::EQ;
1495 case ISD::SETGT: return ARMCC::GT;
1496 case ISD::SETGE: return ARMCC::GE;
1497 case ISD::SETLT: return ARMCC::LT;
1498 case ISD::SETLE: return ARMCC::LE;
1499 case ISD::SETUGT: return ARMCC::HI;
1500 case ISD::SETUGE: return ARMCC::HS;
1501 case ISD::SETULT: return ARMCC::LO;
1502 case ISD::SETULE: return ARMCC::LS;
1503 }
1504}
1505
1506/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1507static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1508 ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1509 CondCode2 = ARMCC::AL;
1510 InvalidOnQNaN = true;
1511 switch (CC) {
1512 default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1512)
;
1513 case ISD::SETEQ:
1514 case ISD::SETOEQ:
1515 CondCode = ARMCC::EQ;
1516 InvalidOnQNaN = false;
1517 break;
1518 case ISD::SETGT:
1519 case ISD::SETOGT: CondCode = ARMCC::GT; break;
1520 case ISD::SETGE:
1521 case ISD::SETOGE: CondCode = ARMCC::GE; break;
1522 case ISD::SETOLT: CondCode = ARMCC::MI; break;
1523 case ISD::SETOLE: CondCode = ARMCC::LS; break;
1524 case ISD::SETONE:
1525 CondCode = ARMCC::MI;
1526 CondCode2 = ARMCC::GT;
1527 InvalidOnQNaN = false;
1528 break;
1529 case ISD::SETO: CondCode = ARMCC::VC; break;
1530 case ISD::SETUO: CondCode = ARMCC::VS; break;
1531 case ISD::SETUEQ:
1532 CondCode = ARMCC::EQ;
1533 CondCode2 = ARMCC::VS;
1534 InvalidOnQNaN = false;
1535 break;
1536 case ISD::SETUGT: CondCode = ARMCC::HI; break;
1537 case ISD::SETUGE: CondCode = ARMCC::PL; break;
1538 case ISD::SETLT:
1539 case ISD::SETULT: CondCode = ARMCC::LT; break;
1540 case ISD::SETLE:
1541 case ISD::SETULE: CondCode = ARMCC::LE; break;
1542 case ISD::SETNE:
1543 case ISD::SETUNE:
1544 CondCode = ARMCC::NE;
1545 InvalidOnQNaN = false;
1546 break;
1547 }
1548}
1549
1550//===----------------------------------------------------------------------===//
1551// Calling Convention Implementation
1552//===----------------------------------------------------------------------===//
1553
1554#include "ARMGenCallingConv.inc"
1555
1556/// getEffectiveCallingConv - Get the effective calling convention, taking into
1557/// account presence of floating point hardware and calling convention
1558/// limitations, such as support for variadic functions.
1559CallingConv::ID
1560ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1561 bool isVarArg) const {
1562 switch (CC) {
1563 default:
1564 llvm_unreachable("Unsupported calling convention")::llvm::llvm_unreachable_internal("Unsupported calling convention"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1564)
;
1565 case CallingConv::ARM_AAPCS:
1566 case CallingConv::ARM_APCS:
1567 case CallingConv::GHC:
1568 return CC;
1569 case CallingConv::PreserveMost:
1570 return CallingConv::PreserveMost;
1571 case CallingConv::ARM_AAPCS_VFP:
1572 case CallingConv::Swift:
1573 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
1574 case CallingConv::C:
1575 if (!Subtarget->isAAPCS_ABI())
1576 return CallingConv::ARM_APCS;
1577 else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1578 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1579 !isVarArg)
1580 return CallingConv::ARM_AAPCS_VFP;
1581 else
1582 return CallingConv::ARM_AAPCS;
1583 case CallingConv::Fast:
1584 case CallingConv::CXX_FAST_TLS:
1585 if (!Subtarget->isAAPCS_ABI()) {
1586 if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1587 return CallingConv::Fast;
1588 return CallingConv::ARM_APCS;
1589 } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1590 return CallingConv::ARM_AAPCS_VFP;
1591 else
1592 return CallingConv::ARM_AAPCS;
1593 }
1594}
1595
1596CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
1597 bool isVarArg) const {
1598 return CCAssignFnForNode(CC, false, isVarArg);
1599}
1600
1601CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
1602 bool isVarArg) const {
1603 return CCAssignFnForNode(CC, true, isVarArg);
1604}
1605
1606/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1607/// CallingConvention.
1608CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1609 bool Return,
1610 bool isVarArg) const {
1611 switch (getEffectiveCallingConv(CC, isVarArg)) {
1612 default:
1613 llvm_unreachable("Unsupported calling convention")::llvm::llvm_unreachable_internal("Unsupported calling convention"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1613)
;
1614 case CallingConv::ARM_APCS:
1615 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1616 case CallingConv::ARM_AAPCS:
1617 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1618 case CallingConv::ARM_AAPCS_VFP:
1619 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1620 case CallingConv::Fast:
1621 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1622 case CallingConv::GHC:
1623 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1624 case CallingConv::PreserveMost:
1625 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1626 }
1627}
1628
1629/// LowerCallResult - Lower the result values of a call into the
1630/// appropriate copies out of appropriate physical registers.
1631SDValue ARMTargetLowering::LowerCallResult(
1632 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1633 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1634 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1635 SDValue ThisVal) const {
1636
1637 // Assign locations to each value returned by this call.
1638 SmallVector<CCValAssign, 16> RVLocs;
1639 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1640 *DAG.getContext());
1641 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1642
1643 // Copy all of the result registers out of their specified physreg.
1644 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1645 CCValAssign VA = RVLocs[i];
1646
1647 // Pass 'this' value directly from the argument to return value, to avoid
1648 // reg unit interference
1649 if (i == 0 && isThisReturn) {
1650 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1651, __PRETTY_FUNCTION__))
1651 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1651, __PRETTY_FUNCTION__))
;
1652 InVals.push_back(ThisVal);
1653 continue;
1654 }
1655
1656 SDValue Val;
1657 if (VA.needsCustom()) {
1658 // Handle f64 or half of a v2f64.
1659 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1660 InFlag);
1661 Chain = Lo.getValue(1);
1662 InFlag = Lo.getValue(2);
1663 VA = RVLocs[++i]; // skip ahead to next loc
1664 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1665 InFlag);
1666 Chain = Hi.getValue(1);
1667 InFlag = Hi.getValue(2);
1668 if (!Subtarget->isLittle())
1669 std::swap (Lo, Hi);
1670 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1671
1672 if (VA.getLocVT() == MVT::v2f64) {
1673 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1674 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1675 DAG.getConstant(0, dl, MVT::i32));
1676
1677 VA = RVLocs[++i]; // skip ahead to next loc
1678 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1679 Chain = Lo.getValue(1);
1680 InFlag = Lo.getValue(2);
1681 VA = RVLocs[++i]; // skip ahead to next loc
1682 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1683 Chain = Hi.getValue(1);
1684 InFlag = Hi.getValue(2);
1685 if (!Subtarget->isLittle())
1686 std::swap (Lo, Hi);
1687 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1688 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1689 DAG.getConstant(1, dl, MVT::i32));
1690 }
1691 } else {
1692 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1693 InFlag);
1694 Chain = Val.getValue(1);
1695 InFlag = Val.getValue(2);
1696 }
1697
1698 switch (VA.getLocInfo()) {
1699 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1699)
;
1700 case CCValAssign::Full: break;
1701 case CCValAssign::BCvt:
1702 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1703 break;
1704 }
1705
1706 InVals.push_back(Val);
1707 }
1708
1709 return Chain;
1710}
1711
1712/// LowerMemOpCallTo - Store the argument to the stack.
1713SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1714 SDValue Arg, const SDLoc &dl,
1715 SelectionDAG &DAG,
1716 const CCValAssign &VA,
1717 ISD::ArgFlagsTy Flags) const {
1718 unsigned LocMemOffset = VA.getLocMemOffset();
1719 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1720 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1721 StackPtr, PtrOff);
1722 return DAG.getStore(
1723 Chain, dl, Arg, PtrOff,
1724 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1725}
1726
1727void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1728 SDValue Chain, SDValue &Arg,
1729 RegsToPassVector &RegsToPass,
1730 CCValAssign &VA, CCValAssign &NextVA,
1731 SDValue &StackPtr,
1732 SmallVectorImpl<SDValue> &MemOpChains,
1733 ISD::ArgFlagsTy Flags) const {
1734
1735 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1736 DAG.getVTList(MVT::i32, MVT::i32), Arg);
1737 unsigned id = Subtarget->isLittle() ? 0 : 1;
1738 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1739
1740 if (NextVA.isRegLoc())
1741 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1742 else {
1743 assert(NextVA.isMemLoc())((NextVA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("NextVA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1743, __PRETTY_FUNCTION__))
;
1744 if (!StackPtr.getNode())
1745 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1746 getPointerTy(DAG.getDataLayout()));
1747
1748 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1749 dl, DAG, NextVA,
1750 Flags));
1751 }
1752}
1753
1754/// LowerCall - Lowering a call into a callseq_start <-
1755/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1756/// nodes.
1757SDValue
1758ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1759 SmallVectorImpl<SDValue> &InVals) const {
1760 SelectionDAG &DAG = CLI.DAG;
1761 SDLoc &dl = CLI.DL;
1762 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1763 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1764 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1765 SDValue Chain = CLI.Chain;
1766 SDValue Callee = CLI.Callee;
1767 bool &isTailCall = CLI.IsTailCall;
1768 CallingConv::ID CallConv = CLI.CallConv;
1769 bool doesNotRet = CLI.DoesNotReturn;
1770 bool isVarArg = CLI.IsVarArg;
1771
1772 MachineFunction &MF = DAG.getMachineFunction();
1773 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1774 bool isThisReturn = false;
1775 bool isSibCall = false;
1776 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
1777
1778 // Disable tail calls if they're not supported.
1779 if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1780 isTailCall = false;
1781
1782 if (isTailCall) {
1783 // Check if it's really possible to do a tail call.
1784 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1785 isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
1786 Outs, OutVals, Ins, DAG);
1787 if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
1788 report_fatal_error("failed to perform tail call elimination on a call "
1789 "site marked musttail");
1790 // We don't support GuaranteedTailCallOpt for ARM, only automatically
1791 // detected sibcalls.
1792 if (isTailCall) {
1793 ++NumTailCalls;
1794 isSibCall = true;
1795 }
1796 }
1797
1798 // Analyze operands of the call, assigning locations to each operand.
1799 SmallVector<CCValAssign, 16> ArgLocs;
1800 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1801 *DAG.getContext());
1802 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1803
1804 // Get a count of how many bytes are to be pushed on the stack.
1805 unsigned NumBytes = CCInfo.getNextStackOffset();
1806
1807 // For tail calls, memory operands are available in our caller's stack.
1808 if (isSibCall)
1809 NumBytes = 0;
1810
1811 // Adjust the stack pointer for the new arguments...
1812 // These operations are automatically eliminated by the prolog/epilog pass
1813 if (!isSibCall)
1814 Chain = DAG.getCALLSEQ_START(Chain,
1815 DAG.getIntPtrConstant(NumBytes, dl, true), dl);
1816
1817 SDValue StackPtr =
1818 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1819
1820 RegsToPassVector RegsToPass;
1821 SmallVector<SDValue, 8> MemOpChains;
1822
1823 // Walk the register/memloc assignments, inserting copies/loads. In the case
1824 // of tail call optimization, arguments are handled later.
1825 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1826 i != e;
1827 ++i, ++realArgIdx) {
1828 CCValAssign &VA = ArgLocs[i];
1829 SDValue Arg = OutVals[realArgIdx];
1830 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1831 bool isByVal = Flags.isByVal();
1832
1833 // Promote the value if needed.
1834 switch (VA.getLocInfo()) {
1835 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1835)
;
1836 case CCValAssign::Full: break;
1837 case CCValAssign::SExt:
1838 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1839 break;
1840 case CCValAssign::ZExt:
1841 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1842 break;
1843 case CCValAssign::AExt:
1844 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1845 break;
1846 case CCValAssign::BCvt:
1847 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1848 break;
1849 }
1850
1851 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1852 if (VA.needsCustom()) {
1853 if (VA.getLocVT() == MVT::v2f64) {
1854 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1855 DAG.getConstant(0, dl, MVT::i32));
1856 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1857 DAG.getConstant(1, dl, MVT::i32));
1858
1859 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1860 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1861
1862 VA = ArgLocs[++i]; // skip ahead to next loc
1863 if (VA.isRegLoc()) {
1864 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1865 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1866 } else {
1867 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1867, __PRETTY_FUNCTION__))
;
1868
1869 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1870 dl, DAG, VA, Flags));
1871 }
1872 } else {
1873 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1874 StackPtr, MemOpChains, Flags);
1875 }
1876 } else if (VA.isRegLoc()) {
1877 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1878 Outs[0].VT == MVT::i32) {
1879 assert(VA.getLocVT() == MVT::i32 &&((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1880, __PRETTY_FUNCTION__))
1880 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1880, __PRETTY_FUNCTION__))
;
1881 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1882, __PRETTY_FUNCTION__))
1882 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1882, __PRETTY_FUNCTION__))
;
1883 isThisReturn = true;
1884 }
1885 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1886 } else if (isByVal) {
1887 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1887, __PRETTY_FUNCTION__))
;
1888 unsigned offset = 0;
1889
1890 // True if this byval aggregate will be split between registers
1891 // and memory.
1892 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1893 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1894
1895 if (CurByValIdx < ByValArgsCount) {
1896
1897 unsigned RegBegin, RegEnd;
1898 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1899
1900 EVT PtrVT =
1901 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
1902 unsigned int i, j;
1903 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1904 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1905 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1906 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1907 MachinePointerInfo(),
1908 DAG.InferPtrAlignment(AddArg));
1909 MemOpChains.push_back(Load.getValue(1));
1910 RegsToPass.push_back(std::make_pair(j, Load));
1911 }
1912
1913 // If parameter size outsides register area, "offset" value
1914 // helps us to calculate stack slot for remained part properly.
1915 offset = RegEnd - RegBegin;
1916
1917 CCInfo.nextInRegsParam();
1918 }
1919
1920 if (Flags.getByValSize() > 4*offset) {
1921 auto PtrVT = getPointerTy(DAG.getDataLayout());
1922 unsigned LocMemOffset = VA.getLocMemOffset();
1923 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1924 SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1925 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1926 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1927 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1928 MVT::i32);
1929 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1930 MVT::i32);
1931
1932 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1933 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1934 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1935 Ops));
1936 }
1937 } else if (!isSibCall) {
1938 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1938, __PRETTY_FUNCTION__))
;
1939
1940 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1941 dl, DAG, VA, Flags));
1942 }
1943 }
1944
1945 if (!MemOpChains.empty())
1946 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1947
1948 // Build a sequence of copy-to-reg nodes chained together with token chain
1949 // and flag operands which copy the outgoing args into the appropriate regs.
1950 SDValue InFlag;
1951 // Tail call byval lowering might overwrite argument registers so in case of
1952 // tail call optimization the copies to registers are lowered later.
1953 if (!isTailCall)
1954 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1955 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1956 RegsToPass[i].second, InFlag);
1957 InFlag = Chain.getValue(1);
1958 }
1959
1960 // For tail calls lower the arguments to the 'real' stack slot.
1961 if (isTailCall) {
1962 // Force all the incoming stack arguments to be loaded from the stack
1963 // before any new outgoing arguments are stored to the stack, because the
1964 // outgoing stack slots may alias the incoming argument stack slots, and
1965 // the alias isn't otherwise explicit. This is slightly more conservative
1966 // than necessary, because it means that each store effectively depends
1967 // on every argument instead of just those arguments it would clobber.
1968
1969 // Do not flag preceding copytoreg stuff together with the following stuff.
1970 InFlag = SDValue();
1971 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1972 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1973 RegsToPass[i].second, InFlag);
1974 InFlag = Chain.getValue(1);
1975 }
1976 InFlag = SDValue();
1977 }
1978
1979 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1980 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1981 // node so that legalize doesn't hack it.
1982 bool isDirect = false;
1983
1984 const TargetMachine &TM = getTargetMachine();
1985 const Module *Mod = MF.getFunction()->getParent();
1986 const GlobalValue *GV = nullptr;
1987 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1988 GV = G->getGlobal();
1989 bool isStub =
1990 !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
1991
1992 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1993 bool isLocalARMFunc = false;
1994 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1995 auto PtrVt = getPointerTy(DAG.getDataLayout());
1996
1997 if (Subtarget->genLongCalls()) {
1998 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(((!isPositionIndependent() || Subtarget->isTargetWindows(
)) && "long-calls codegen is not position independent!"
) ? static_cast<void> (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1999, __PRETTY_FUNCTION__))
1999 "long-calls codegen is not position independent!")(((!isPositionIndependent() || Subtarget->isTargetWindows(
)) && "long-calls codegen is not position independent!"
) ? static_cast<void> (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 1999, __PRETTY_FUNCTION__))
;
2000 // Handle a global address or an external symbol. If it's not one of
2001 // those, the target's already in a register, so we don't need to do
2002 // anything extra.
2003 if (isa<GlobalAddressSDNode>(Callee)) {
2004 // Create a constant pool entry for the callee address
2005 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2006 ARMConstantPoolValue *CPV =
2007 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2008
2009 // Get the address of the callee into a register
2010 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2011 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2012 Callee = DAG.getLoad(
2013 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2014 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2015 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2016 const char *Sym = S->getSymbol();
2017
2018 // Create a constant pool entry for the callee address
2019 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2020 ARMConstantPoolValue *CPV =
2021 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2022 ARMPCLabelIndex, 0);
2023 // Get the address of the callee into a register
2024 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2025 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2026 Callee = DAG.getLoad(
2027 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2028 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2029 }
2030 } else if (isa<GlobalAddressSDNode>(Callee)) {
2031 // If we're optimizing for minimum size and the function is called three or
2032 // more times in this block, we can improve codesize by calling indirectly
2033 // as BLXr has a 16-bit encoding.
2034 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2035 auto *BB = CLI.CS->getParent();
2036 bool PreferIndirect =
2037 Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
2038 count_if(GV->users(), [&BB](const User *U) {
2039 return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2040 }) > 2;
2041
2042 if (!PreferIndirect) {
2043 isDirect = true;
2044 bool isDef = GV->isStrongDefinitionForLinker();
2045
2046 // ARM call to a local ARM function is predicable.
2047 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2048 // tBX takes a register source operand.
2049 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2050 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")((Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2050, __PRETTY_FUNCTION__))
;
2051 Callee = DAG.getNode(
2052 ARMISD::WrapperPIC, dl, PtrVt,
2053 DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2054 Callee = DAG.getLoad(
2055 PtrVt, dl, DAG.getEntryNode(), Callee,
2056 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2057 /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2058 MachineMemOperand::MOInvariant);
2059 } else if (Subtarget->isTargetCOFF()) {
2060 assert(Subtarget->isTargetWindows() &&((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2061, __PRETTY_FUNCTION__))
2061 "Windows is the only supported COFF target")((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2061, __PRETTY_FUNCTION__))
;
2062 unsigned TargetFlags = GV->hasDLLImportStorageClass()
2063 ? ARMII::MO_DLLIMPORT
2064 : ARMII::MO_NO_FLAG;
2065 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2066 TargetFlags);
2067 if (GV->hasDLLImportStorageClass())
2068 Callee =
2069 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2070 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2071 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2072 } else {
2073 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2074 }
2075 }
2076 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2077 isDirect = true;
2078 // tBX takes a register source operand.
2079 const char *Sym = S->getSymbol();
2080 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2081 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2082 ARMConstantPoolValue *CPV =
2083 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2084 ARMPCLabelIndex, 4);
2085 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2086 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2087 Callee = DAG.getLoad(
2088 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2089 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2090 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2091 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2092 } else {
2093 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2094 }
2095 }
2096
2097 // FIXME: handle tail calls differently.
2098 unsigned CallOpc;
2099 if (Subtarget->isThumb()) {
2100 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2101 CallOpc = ARMISD::CALL_NOLINK;
2102 else
2103 CallOpc = ARMISD::CALL;
2104 } else {
2105 if (!isDirect && !Subtarget->hasV5TOps())
2106 CallOpc = ARMISD::CALL_NOLINK;
2107 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2108 // Emit regular call when code size is the priority
2109 !MF.getFunction()->optForMinSize())
2110 // "mov lr, pc; b _foo" to avoid confusing the RSP
2111 CallOpc = ARMISD::CALL_NOLINK;
2112 else
2113 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2114 }
2115
2116 std::vector<SDValue> Ops;
2117 Ops.push_back(Chain);
2118 Ops.push_back(Callee);
2119
2120 // Add argument registers to the end of the list so that they are known live
2121 // into the call.
2122 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2123 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2124 RegsToPass[i].second.getValueType()));
2125
2126 // Add a register mask operand representing the call-preserved registers.
2127 if (!isTailCall) {
2128 const uint32_t *Mask;
2129 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2130 if (isThisReturn) {
2131 // For 'this' returns, use the R0-preserving mask if applicable
2132 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2133 if (!Mask) {
2134 // Set isThisReturn to false if the calling convention is not one that
2135 // allows 'returned' to be modeled in this way, so LowerCallResult does
2136 // not try to pass 'this' straight through
2137 isThisReturn = false;
2138 Mask = ARI->getCallPreservedMask(MF, CallConv);
2139 }
2140 } else
2141 Mask = ARI->getCallPreservedMask(MF, CallConv);
2142
2143 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2143, __PRETTY_FUNCTION__))
;
2144 Ops.push_back(DAG.getRegisterMask(Mask));
2145 }
2146
2147 if (InFlag.getNode())
2148 Ops.push_back(InFlag);
2149
2150 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2151 if (isTailCall) {
2152 MF.getFrameInfo().setHasTailCall();
2153 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2154 }
2155
2156 // Returns a chain and a flag for retval copy to use.
2157 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2158 InFlag = Chain.getValue(1);
2159
2160 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2161 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2162 if (!Ins.empty())
2163 InFlag = Chain.getValue(1);
2164
2165 // Handle result values, copying them out of physregs into vregs that we
2166 // return.
2167 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2168 InVals, isThisReturn,
2169 isThisReturn ? OutVals[0] : SDValue());
2170}
2171
2172/// HandleByVal - Every parameter *after* a byval parameter is passed
2173/// on the stack. Remember the next parameter register to allocate,
2174/// and then confiscate the rest of the parameter registers to insure
2175/// this.
2176void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2177 unsigned Align) const {
2178 // Byval (as with any stack) slots are always at least 4 byte aligned.
2179 Align = std::max(Align, 4U);
2180
2181 unsigned Reg = State->AllocateReg(GPRArgRegs);
2182 if (!Reg)
2183 return;
2184
2185 unsigned AlignInRegs = Align / 4;
2186 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2187 for (unsigned i = 0; i < Waste; ++i)
2188 Reg = State->AllocateReg(GPRArgRegs);
2189
2190 if (!Reg)
2191 return;
2192
2193 unsigned Excess = 4 * (ARM::R4 - Reg);
2194
2195 // Special case when NSAA != SP and parameter size greater than size of
2196 // all remained GPR regs. In that case we can't split parameter, we must
2197 // send it to stack. We also must set NCRN to R4, so waste all
2198 // remained registers.
2199 const unsigned NSAAOffset = State->getNextStackOffset();
2200 if (NSAAOffset != 0 && Size > Excess) {
2201 while (State->AllocateReg(GPRArgRegs))
2202 ;
2203 return;
2204 }
2205
2206 // First register for byval parameter is the first register that wasn't
2207 // allocated before this method call, so it would be "reg".
2208 // If parameter is small enough to be saved in range [reg, r4), then
2209 // the end (first after last) register would be reg + param-size-in-regs,
2210 // else parameter would be splitted between registers and stack,
2211 // end register would be r4 in this case.
2212 unsigned ByValRegBegin = Reg;
2213 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2214 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2215 // Note, first register is allocated in the beginning of function already,
2216 // allocate remained amount of registers we need.
2217 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2218 State->AllocateReg(GPRArgRegs);
2219 // A byval parameter that is split between registers and memory needs its
2220 // size truncated here.
2221 // In the case where the entire structure fits in registers, we set the
2222 // size in memory to zero.
2223 Size = std::max<int>(Size - Excess, 0);
2224}
2225
2226/// MatchingStackOffset - Return true if the given stack call argument is
2227/// already available in the same position (relatively) of the caller's
2228/// incoming argument stack.
2229static
2230bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2231 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2232 const TargetInstrInfo *TII) {
2233 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2234 int FI = std::numeric_limits<int>::max();
2235 if (Arg.getOpcode() == ISD::CopyFromReg) {
2236 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2237 if (!TargetRegisterInfo::isVirtualRegister(VR))
2238 return false;
2239 MachineInstr *Def = MRI->getVRegDef(VR);
2240 if (!Def)
2241 return false;
2242 if (!Flags.isByVal()) {
2243 if (!TII->isLoadFromStackSlot(*Def, FI))
2244 return false;
2245 } else {
2246 return false;
2247 }
2248 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2249 if (Flags.isByVal())
2250 // ByVal argument is passed in as a pointer but it's now being
2251 // dereferenced. e.g.
2252 // define @foo(%struct.X* %A) {
2253 // tail call @bar(%struct.X* byval %A)
2254 // }
2255 return false;
2256 SDValue Ptr = Ld->getBasePtr();
2257 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2258 if (!FINode)
2259 return false;
2260 FI = FINode->getIndex();
2261 } else
2262 return false;
2263
2264 assert(FI != std::numeric_limits<int>::max())((FI != std::numeric_limits<int>::max()) ? static_cast<
void> (0) : __assert_fail ("FI != std::numeric_limits<int>::max()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2264, __PRETTY_FUNCTION__))
;
2265 if (!MFI.isFixedObjectIndex(FI))
2266 return false;
2267 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2268}
2269
2270/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2271/// for tail call optimization. Targets which want to do tail call
2272/// optimization should implement this function.
2273bool
2274ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2275 CallingConv::ID CalleeCC,
2276 bool isVarArg,
2277 bool isCalleeStructRet,
2278 bool isCallerStructRet,
2279 const SmallVectorImpl<ISD::OutputArg> &Outs,
2280 const SmallVectorImpl<SDValue> &OutVals,
2281 const SmallVectorImpl<ISD::InputArg> &Ins,
2282 SelectionDAG& DAG) const {
2283 MachineFunction &MF = DAG.getMachineFunction();
2284 const Function *CallerF = MF.getFunction();
2285 CallingConv::ID CallerCC = CallerF->getCallingConv();
2286
2287 assert(Subtarget->supportsTailCall())((Subtarget->supportsTailCall()) ? static_cast<void>
(0) : __assert_fail ("Subtarget->supportsTailCall()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2287, __PRETTY_FUNCTION__))
;
2288
2289 // Look for obvious safe cases to perform tail call optimization that do not
2290 // require ABI changes. This is what gcc calls sibcall.
2291
2292 // Exception-handling functions need a special set of instructions to indicate
2293 // a return to the hardware. Tail-calling another function would probably
2294 // break this.
2295 if (CallerF->hasFnAttribute("interrupt"))
2296 return false;
2297
2298 // Also avoid sibcall optimization if either caller or callee uses struct
2299 // return semantics.
2300 if (isCalleeStructRet || isCallerStructRet)
2301 return false;
2302
2303 // Externally-defined functions with weak linkage should not be
2304 // tail-called on ARM when the OS does not support dynamic
2305 // pre-emption of symbols, as the AAELF spec requires normal calls
2306 // to undefined weak functions to be replaced with a NOP or jump to the
2307 // next instruction. The behaviour of branch instructions in this
2308 // situation (as used for tail calls) is implementation-defined, so we
2309 // cannot rely on the linker replacing the tail call with a return.
2310 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2311 const GlobalValue *GV = G->getGlobal();
2312 const Triple &TT = getTargetMachine().getTargetTriple();
2313 if (GV->hasExternalWeakLinkage() &&
2314 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2315 return false;
2316 }
2317
2318 // Check that the call results are passed in the same way.
2319 LLVMContext &C = *DAG.getContext();
2320 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2321 CCAssignFnForReturn(CalleeCC, isVarArg),
2322 CCAssignFnForReturn(CallerCC, isVarArg)))
2323 return false;
2324 // The callee has to preserve all registers the caller needs to preserve.
2325 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2326 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2327 if (CalleeCC != CallerCC) {
2328 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2329 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2330 return false;
2331 }
2332
2333 // If Caller's vararg or byval argument has been split between registers and
2334 // stack, do not perform tail call, since part of the argument is in caller's
2335 // local frame.
2336 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2337 if (AFI_Caller->getArgRegsSaveSize())
2338 return false;
2339
2340 // If the callee takes no arguments then go on to check the results of the
2341 // call.
2342 if (!Outs.empty()) {
2343 // Check if stack adjustment is needed. For now, do not do this if any
2344 // argument is passed on the stack.
2345 SmallVector<CCValAssign, 16> ArgLocs;
2346 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2347 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2348 if (CCInfo.getNextStackOffset()) {
2349 // Check if the arguments are already laid out in the right way as
2350 // the caller's fixed stack objects.
2351 MachineFrameInfo &MFI = MF.getFrameInfo();
2352 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2353 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2354 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2355 i != e;
2356 ++i, ++realArgIdx) {
2357 CCValAssign &VA = ArgLocs[i];
2358 EVT RegVT = VA.getLocVT();
2359 SDValue Arg = OutVals[realArgIdx];
2360 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2361 if (VA.getLocInfo() == CCValAssign::Indirect)
2362 return false;
2363 if (VA.needsCustom()) {
2364 // f64 and vector types are split into multiple registers or
2365 // register/stack-slot combinations. The types will not match
2366 // the registers; give up on memory f64 refs until we figure
2367 // out what to do about this.
2368 if (!VA.isRegLoc())
2369 return false;
2370 if (!ArgLocs[++i].isRegLoc())
2371 return false;
2372 if (RegVT == MVT::v2f64) {
2373 if (!ArgLocs[++i].isRegLoc())
2374 return false;
2375 if (!ArgLocs[++i].isRegLoc())
2376 return false;
2377 }
2378 } else if (!VA.isRegLoc()) {
2379 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2380 MFI, MRI, TII))
2381 return false;
2382 }
2383 }
2384 }
2385
2386 const MachineRegisterInfo &MRI = MF.getRegInfo();
2387 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2388 return false;
2389 }
2390
2391 return true;
2392}
2393
2394bool
2395ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2396 MachineFunction &MF, bool isVarArg,
2397 const SmallVectorImpl<ISD::OutputArg> &Outs,
2398 LLVMContext &Context) const {
2399 SmallVector<CCValAssign, 16> RVLocs;
2400 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2401 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2402}
2403
2404static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
2405 const SDLoc &DL, SelectionDAG &DAG) {
2406 const MachineFunction &MF = DAG.getMachineFunction();
2407 const Function *F = MF.getFunction();
2408
2409 StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
2410
2411 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2412 // version of the "preferred return address". These offsets affect the return
2413 // instruction if this is a return from PL1 without hypervisor extensions.
2414 // IRQ/FIQ: +4 "subs pc, lr, #4"
2415 // SWI: 0 "subs pc, lr, #0"
2416 // ABORT: +4 "subs pc, lr, #4"
2417 // UNDEF: +4/+2 "subs pc, lr, #0"
2418 // UNDEF varies depending on where the exception came from ARM or Thumb
2419 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2420
2421 int64_t LROffset;
2422 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2423 IntKind == "ABORT")
2424 LROffset = 4;
2425 else if (IntKind == "SWI" || IntKind == "UNDEF")
2426 LROffset = 0;
2427 else
2428 report_fatal_error("Unsupported interrupt attribute. If present, value "
2429 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2430
2431 RetOps.insert(RetOps.begin() + 1,
2432 DAG.getConstant(LROffset, DL, MVT::i32, false));
2433
2434 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2435}
2436
2437SDValue
2438ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2439 bool isVarArg,
2440 const SmallVectorImpl<ISD::OutputArg> &Outs,
2441 const SmallVectorImpl<SDValue> &OutVals,
2442 const SDLoc &dl, SelectionDAG &DAG) const {
2443
2444 // CCValAssign - represent the assignment of the return value to a location.
2445 SmallVector<CCValAssign, 16> RVLocs;
2446
2447 // CCState - Info about the registers and stack slots.
2448 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2449 *DAG.getContext());
2450
2451 // Analyze outgoing return values.
2452 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2453
2454 SDValue Flag;
2455 SmallVector<SDValue, 4> RetOps;
2456 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2457 bool isLittleEndian = Subtarget->isLittle();
2458
2459 MachineFunction &MF = DAG.getMachineFunction();
2460 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2461 AFI->setReturnRegsCount(RVLocs.size());
2462
2463 // Copy the result values into the output registers.
2464 for (unsigned i = 0, realRVLocIdx = 0;
2465 i != RVLocs.size();
2466 ++i, ++realRVLocIdx) {
2467 CCValAssign &VA = RVLocs[i];
2468 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2468, __PRETTY_FUNCTION__))
;
2469
2470 SDValue Arg = OutVals[realRVLocIdx];
2471
2472 switch (VA.getLocInfo()) {
2473 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2473)
;
2474 case CCValAssign::Full: break;
2475 case CCValAssign::BCvt:
2476 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2477 break;
2478 }
2479
2480 if (VA.needsCustom()) {
2481 if (VA.getLocVT() == MVT::v2f64) {
2482 // Extract the first half and return it in two registers.
2483 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2484 DAG.getConstant(0, dl, MVT::i32));
2485 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2486 DAG.getVTList(MVT::i32, MVT::i32), Half);
2487
2488 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2489 HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2490 Flag);
2491 Flag = Chain.getValue(1);
2492 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2493 VA = RVLocs[++i]; // skip ahead to next loc
2494 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2495 HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2496 Flag);
2497 Flag = Chain.getValue(1);
2498 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2499 VA = RVLocs[++i]; // skip ahead to next loc
2500
2501 // Extract the 2nd half and fall through to handle it as an f64 value.
2502 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2503 DAG.getConstant(1, dl, MVT::i32));
2504 }
2505 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2506 // available.
2507 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2508 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2509 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2510 fmrrd.getValue(isLittleEndian ? 0 : 1),
2511 Flag);
2512 Flag = Chain.getValue(1);
2513 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2514 VA = RVLocs[++i]; // skip ahead to next loc
2515 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2516 fmrrd.getValue(isLittleEndian ? 1 : 0),
2517 Flag);
2518 } else
2519 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2520
2521 // Guarantee that all emitted copies are
2522 // stuck together, avoiding something bad.
2523 Flag = Chain.getValue(1);
2524 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2525 }
2526 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2527 const MCPhysReg *I =
2528 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2529 if (I) {
2530 for (; *I; ++I) {
2531 if (ARM::GPRRegClass.contains(*I))
2532 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2533 else if (ARM::DPRRegClass.contains(*I))
2534 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2535 else
2536 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2536)
;
2537 }
2538 }
2539
2540 // Update chain and glue.
2541 RetOps[0] = Chain;
2542 if (Flag.getNode())
2543 RetOps.push_back(Flag);
2544
2545 // CPUs which aren't M-class use a special sequence to return from
2546 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2547 // though we use "subs pc, lr, #N").
2548 //
2549 // M-class CPUs actually use a normal return sequence with a special
2550 // (hardware-provided) value in LR, so the normal code path works.
2551 if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
2552 !Subtarget->isMClass()) {
2553 if (Subtarget->isThumb1Only())
2554 report_fatal_error("interrupt attribute is not supported in Thumb1");
2555 return LowerInterruptReturn(RetOps, dl, DAG);
2556 }
2557
2558 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2559}
2560
2561bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2562 if (N->getNumValues() != 1)
2563 return false;
2564 if (!N->hasNUsesOfValue(1, 0))
2565 return false;
2566
2567 SDValue TCChain = Chain;
2568 SDNode *Copy = *N->use_begin();
2569 if (Copy->getOpcode() == ISD::CopyToReg) {
2570 // If the copy has a glue operand, we conservatively assume it isn't safe to
2571 // perform a tail call.
2572 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2573 return false;
2574 TCChain = Copy->getOperand(0);
2575 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2576 SDNode *VMov = Copy;
2577 // f64 returned in a pair of GPRs.
2578 SmallPtrSet<SDNode*, 2> Copies;
2579 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2580 UI != UE; ++UI) {
2581 if (UI->getOpcode() != ISD::CopyToReg)
2582 return false;
2583 Copies.insert(*UI);
2584 }
2585 if (Copies.size() > 2)
2586 return false;
2587
2588 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2589 UI != UE; ++UI) {
2590 SDValue UseChain = UI->getOperand(0);
2591 if (Copies.count(UseChain.getNode()))
2592 // Second CopyToReg
2593 Copy = *UI;
2594 else {
2595 // We are at the top of this chain.
2596 // If the copy has a glue operand, we conservatively assume it
2597 // isn't safe to perform a tail call.
2598 if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2599 return false;
2600 // First CopyToReg
2601 TCChain = UseChain;
2602 }
2603 }
2604 } else if (Copy->getOpcode() == ISD::BITCAST) {
2605 // f32 returned in a single GPR.
2606 if (!Copy->hasOneUse())
2607 return false;
2608 Copy = *Copy->use_begin();
2609 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2610 return false;
2611 // If the copy has a glue operand, we conservatively assume it isn't safe to
2612 // perform a tail call.
2613 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2614 return false;
2615 TCChain = Copy->getOperand(0);
2616 } else {
2617 return false;
2618 }
2619
2620 bool HasRet = false;
2621 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2622 UI != UE; ++UI) {
2623 if (UI->getOpcode() != ARMISD::RET_FLAG &&
2624 UI->getOpcode() != ARMISD::INTRET_FLAG)
2625 return false;
2626 HasRet = true;
2627 }
2628
2629 if (!HasRet)
2630 return false;
2631
2632 Chain = TCChain;
2633 return true;
2634}
2635
2636bool ARMTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
2637 if (!Subtarget->supportsTailCall())
2638 return false;
2639
2640 auto Attr =
2641 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2642 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2643 return false;
2644
2645 return true;
2646}
2647
2648// Trying to write a 64 bit value so need to split into two 32 bit values first,
2649// and pass the lower and high parts through.
2650static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
2651 SDLoc DL(Op);
2652 SDValue WriteValue = Op->getOperand(2);
2653
2654 // This function is only supposed to be called for i64 type argument.
2655 assert(WriteValue.getValueType() == MVT::i64((WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? static_cast<void> (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2656, __PRETTY_FUNCTION__))
2656 && "LowerWRITE_REGISTER called for non-i64 type argument.")((WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? static_cast<void> (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2656, __PRETTY_FUNCTION__))
;
2657
2658 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2659 DAG.getConstant(0, DL, MVT::i32));
2660 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2661 DAG.getConstant(1, DL, MVT::i32));
2662 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2663 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2664}
2665
2666// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2667// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2668// one of the above mentioned nodes. It has to be wrapped because otherwise
2669// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2670// be used to form addressing mode. These wrapped nodes will be selected
2671// into MOVi.
2672static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
2673 EVT PtrVT = Op.getValueType();
2674 // FIXME there is no actual debug info here
2675 SDLoc dl(Op);
2676 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2677 SDValue Res;
2678 if (CP->isMachineConstantPoolEntry())
2679 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2680 CP->getAlignment());
2681 else
2682 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2683 CP->getAlignment());
2684 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2685}
2686
2687unsigned ARMTargetLowering::getJumpTableEncoding() const {
2688 return MachineJumpTableInfo::EK_Inline;
2689}
2690
2691SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2692 SelectionDAG &DAG) const {
2693 MachineFunction &MF = DAG.getMachineFunction();
2694 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2695 unsigned ARMPCLabelIndex = 0;
2696 SDLoc DL(Op);
2697 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2698 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2699 SDValue CPAddr;
2700 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2701 if (!IsPositionIndependent) {
2702 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2703 } else {
2704 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2705 ARMPCLabelIndex = AFI->createPICLabelUId();
2706 ARMConstantPoolValue *CPV =
2707 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2708 ARMCP::CPBlockAddress, PCAdj);
2709 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2710 }
2711 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2712 SDValue Result = DAG.getLoad(
2713 PtrVT, DL, DAG.getEntryNode(), CPAddr,
2714 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2715 if (!IsPositionIndependent)
2716 return Result;
2717 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2718 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2719}
2720
2721/// \brief Convert a TLS address reference into the correct sequence of loads
2722/// and calls to compute the variable's address for Darwin, and return an
2723/// SDValue containing the final node.
2724
2725/// Darwin only has one TLS scheme which must be capable of dealing with the
2726/// fully general situation, in the worst case. This means:
2727/// + "extern __thread" declaration.
2728/// + Defined in a possibly unknown dynamic library.
2729///
2730/// The general system is that each __thread variable has a [3 x i32] descriptor
2731/// which contains information used by the runtime to calculate the address. The
2732/// only part of this the compiler needs to know about is the first word, which
2733/// contains a function pointer that must be called with the address of the
2734/// entire descriptor in "r0".
2735///
2736/// Since this descriptor may be in a different unit, in general access must
2737/// proceed along the usual ARM rules. A common sequence to produce is:
2738///
2739/// movw rT1, :lower16:_var$non_lazy_ptr
2740/// movt rT1, :upper16:_var$non_lazy_ptr
2741/// ldr r0, [rT1]
2742/// ldr rT2, [r0]
2743/// blx rT2
2744/// [...address now in r0...]
2745SDValue
2746ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2747 SelectionDAG &DAG) const {
2748 assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin")((Subtarget->isTargetDarwin() && "TLS only supported on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"TLS only supported on Darwin\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2748, __PRETTY_FUNCTION__))
;
2749 SDLoc DL(Op);
2750
2751 // First step is to get the address of the actua global symbol. This is where
2752 // the TLS descriptor lives.
2753 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2754
2755 // The first entry in the descriptor is a function pointer that we must call
2756 // to obtain the address of the variable.
2757 SDValue Chain = DAG.getEntryNode();
2758 SDValue FuncTLVGet = DAG.getLoad(
2759 MVT::i32, DL, Chain, DescAddr,
2760 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2761 /* Alignment = */ 4,
2762 MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
2763 MachineMemOperand::MOInvariant);
2764 Chain = FuncTLVGet.getValue(1);
2765
2766 MachineFunction &F = DAG.getMachineFunction();
2767 MachineFrameInfo &MFI = F.getFrameInfo();
2768 MFI.setAdjustsStack(true);
2769
2770 // TLS calls preserve all registers except those that absolutely must be
2771 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2772 // silly).
2773 auto TRI =
2774 getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
2775 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2776 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2777
2778 // Finally, we can make the call. This is just a degenerate version of a
2779 // normal AArch64 call node: r0 takes the address of the descriptor, and
2780 // returns the address of the variable in this thread.
2781 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2782 Chain =
2783 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
2784 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2785 DAG.getRegisterMask(Mask), Chain.getValue(1));
2786 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2787}
2788
2789SDValue
2790ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2791 SelectionDAG &DAG) const {
2792 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")((Subtarget->isTargetWindows() && "Windows specific TLS lowering"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2792, __PRETTY_FUNCTION__))
;
2793
2794 SDValue Chain = DAG.getEntryNode();
2795 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2796 SDLoc DL(Op);
2797
2798 // Load the current TEB (thread environment block)
2799 SDValue Ops[] = {Chain,
2800 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2801 DAG.getConstant(15, DL, MVT::i32),
2802 DAG.getConstant(0, DL, MVT::i32),
2803 DAG.getConstant(13, DL, MVT::i32),
2804 DAG.getConstant(0, DL, MVT::i32),
2805 DAG.getConstant(2, DL, MVT::i32)};
2806 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2807 DAG.getVTList(MVT::i32, MVT::Other), Ops);
2808
2809 SDValue TEB = CurrentTEB.getValue(0);
2810 Chain = CurrentTEB.getValue(1);
2811
2812 // Load the ThreadLocalStoragePointer from the TEB
2813 // A pointer to the TLS array is located at offset 0x2c from the TEB.
2814 SDValue TLSArray =
2815 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2816 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2817
2818 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2819 // offset into the TLSArray.
2820
2821 // Load the TLS index from the C runtime
2822 SDValue TLSIndex =
2823 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2824 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2825 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2826
2827 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2828 DAG.getConstant(2, DL, MVT::i32));
2829 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2830 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2831 MachinePointerInfo());
2832
2833 // Get the offset of the start of the .tls section (section base)
2834 const auto *GA = cast<GlobalAddressSDNode>(Op);
2835 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2836 SDValue Offset = DAG.getLoad(
2837 PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2838 DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2839 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2840
2841 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2842}
2843
2844// Lower ISD::GlobalTLSAddress using the "general dynamic" model
2845SDValue
2846ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2847 SelectionDAG &DAG) const {
2848 SDLoc dl(GA);
2849 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2850 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2851 MachineFunction &MF = DAG.getMachineFunction();
2852 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2853 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2854 ARMConstantPoolValue *CPV =
2855 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2856 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2857 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2858 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2859 Argument = DAG.getLoad(
2860 PtrVT, dl, DAG.getEntryNode(), Argument,
2861 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2862 SDValue Chain = Argument.getValue(1);
2863
2864 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2865 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2866
2867 // call __tls_get_addr.
2868 ArgListTy Args;
2869 ArgListEntry Entry;
2870 Entry.Node = Argument;
2871 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2872 Args.push_back(Entry);
2873
2874 // FIXME: is there useful debug info available here?
2875 TargetLowering::CallLoweringInfo CLI(DAG);
2876 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2877 CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
2878 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2879
2880 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2881 return CallResult.first;
2882}
2883
2884// Lower ISD::GlobalTLSAddress using the "initial exec" or
2885// "local exec" model.
2886SDValue
2887ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2888 SelectionDAG &DAG,
2889 TLSModel::Model model) const {
2890 const GlobalValue *GV = GA->getGlobal();
2891 SDLoc dl(GA);
2892 SDValue Offset;
2893 SDValue Chain = DAG.getEntryNode();
2894 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2895 // Get the Thread Pointer
2896 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2897
2898 if (model == TLSModel::InitialExec) {
2899 MachineFunction &MF = DAG.getMachineFunction();
2900 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2901 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2902 // Initial exec model.
2903 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2904 ARMConstantPoolValue *CPV =
2905 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2906 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
2907 true);
2908 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2909 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2910 Offset = DAG.getLoad(
2911 PtrVT, dl, Chain, Offset,
2912 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2913 Chain = Offset.getValue(1);
2914
2915 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2916 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2917
2918 Offset = DAG.getLoad(
2919 PtrVT, dl, Chain, Offset,
2920 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2921 } else {
2922 // local exec model
2923 assert(model == TLSModel::LocalExec)((model == TLSModel::LocalExec) ? static_cast<void> (0)
: __assert_fail ("model == TLSModel::LocalExec", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2923, __PRETTY_FUNCTION__))
;
2924 ARMConstantPoolValue *CPV =
2925 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
2926 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2927 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2928 Offset = DAG.getLoad(
2929 PtrVT, dl, Chain, Offset,
2930 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2931 }
2932
2933 // The address of the thread local variable is the add of the thread
2934 // pointer with the offset of the variable.
2935 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2936}
2937
2938SDValue
2939ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2940 if (Subtarget->isTargetDarwin())
2941 return LowerGlobalTLSAddressDarwin(Op, DAG);
2942
2943 if (Subtarget->isTargetWindows())
2944 return LowerGlobalTLSAddressWindows(Op, DAG);
2945
2946 // TODO: implement the "local dynamic" model
2947 assert(Subtarget->isTargetELF() && "Only ELF implemented here")((Subtarget->isTargetELF() && "Only ELF implemented here"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2947, __PRETTY_FUNCTION__))
;
2948 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2949 if (DAG.getTarget().Options.EmulatedTLS)
2950 return LowerToTLSEmulatedModel(GA, DAG);
2951
2952 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2953
2954 switch (model) {
2955 case TLSModel::GeneralDynamic:
2956 case TLSModel::LocalDynamic:
2957 return LowerToTLSGeneralDynamicModel(GA, DAG);
2958 case TLSModel::InitialExec:
2959 case TLSModel::LocalExec:
2960 return LowerToTLSExecModels(GA, DAG, model);
2961 }
2962 llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 2962)
;
2963}
2964
2965/// Return true if all users of V are within function F, looking through
2966/// ConstantExprs.
2967static bool allUsersAreInFunction(const Value *V, const Function *F) {
2968 SmallVector<const User*,4> Worklist;
2969 for (auto *U : V->users())
2970 Worklist.push_back(U);
2971 while (!Worklist.empty()) {
2972 auto *U = Worklist.pop_back_val();
2973 if (isa<ConstantExpr>(U)) {
2974 for (auto *UU : U->users())
2975 Worklist.push_back(UU);
2976 continue;
2977 }
2978
2979 auto *I = dyn_cast<Instruction>(U);
2980 if (!I || I->getParent()->getParent() != F)
2981 return false;
2982 }
2983 return true;
2984}
2985
2986/// Return true if all users of V are within some (any) function, looking through
2987/// ConstantExprs. In other words, are there any global constant users?
2988static bool allUsersAreInFunctions(const Value *V) {
2989 SmallVector<const User*,4> Worklist;
2990 for (auto *U : V->users())
2991 Worklist.push_back(U);
2992 while (!Worklist.empty()) {
2993 auto *U = Worklist.pop_back_val();
2994 if (isa<ConstantExpr>(U)) {
2995 for (auto *UU : U->users())
2996 Worklist.push_back(UU);
2997 continue;
2998 }
2999
3000 if (!isa<Instruction>(U))
3001 return false;
3002 }
3003 return true;
3004}
3005
3006// Return true if T is an integer, float or an array/vector of either.
3007static bool isSimpleType(Type *T) {
3008 if (T->isIntegerTy() || T->isFloatingPointTy())
3009 return true;
3010 Type *SubT = nullptr;
3011 if (T->isArrayTy())
3012 SubT = T->getArrayElementType();
3013 else if (T->isVectorTy())
3014 SubT = T->getVectorElementType();
3015 else
3016 return false;
3017 return SubT->isIntegerTy() || SubT->isFloatingPointTy();
3018}
3019
3020static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
3021 EVT PtrVT, const SDLoc &dl) {
3022 // If we're creating a pool entry for a constant global with unnamed address,
3023 // and the global is small enough, we can emit it inline into the constant pool
3024 // to save ourselves an indirection.
3025 //
3026 // This is a win if the constant is only used in one function (so it doesn't
3027 // need to be duplicated) or duplicating the constant wouldn't increase code
3028 // size (implying the constant is no larger than 4 bytes).
3029 const Function *F = DAG.getMachineFunction().getFunction();
3030
3031 // We rely on this decision to inline being idemopotent and unrelated to the
3032 // use-site. We know that if we inline a variable at one use site, we'll
3033 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3034 // doesn't know about this optimization, so bail out if it's enabled else
3035 // we could decide to inline here (and thus never emit the GV) but require
3036 // the GV from fast-isel generated code.
3037 if (!EnableConstpoolPromotion ||
3038 DAG.getMachineFunction().getTarget().Options.EnableFastISel)
3039 return SDValue();
3040
3041 auto *GVar = dyn_cast<GlobalVariable>(GV);
3042 if (!GVar || !GVar->hasInitializer() ||
3043 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3044 !GVar->hasLocalLinkage())
3045 return SDValue();
3046
3047 // Ensure that we don't try and inline any type that contains pointers. If
3048 // we inline a value that contains relocations, we move the relocations from
3049 // .data to .text which is not ideal.
3050 auto *Init = GVar->getInitializer();
3051 if (!isSimpleType(Init->getType()))
3052 return SDValue();
3053
3054 // The constant islands pass can only really deal with alignment requests
3055 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3056 // any type wanting greater alignment requirements than 4 bytes. We also
3057 // can only promote constants that are multiples of 4 bytes in size or
3058 // are paddable to a multiple of 4. Currently we only try and pad constants
3059 // that are strings for simplicity.
3060 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3061 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3062 unsigned Align = GVar->getAlignment();
3063 unsigned RequiredPadding = 4 - (Size % 4);
3064 bool PaddingPossible =
3065 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3066 if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize ||
3067 Size == 0)
3068 return SDValue();
3069
3070 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3071 MachineFunction &MF = DAG.getMachineFunction();
3072 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3073
3074 // We can't bloat the constant pool too much, else the ConstantIslands pass
3075 // may fail to converge. If we haven't promoted this global yet (it may have
3076 // multiple uses), and promoting it would increase the constant pool size (Sz
3077 // > 4), ensure we have space to do so up to MaxTotal.
3078 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3079 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3080 ConstpoolPromotionMaxTotal)
3081 return SDValue();
3082
3083 // This is only valid if all users are in a single function OR it has users
3084 // in multiple functions but it no larger than a pointer. We also check if
3085 // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
3086 // address taken.
3087 if (!allUsersAreInFunction(GVar, F) &&
3088 !(Size <= 4 && allUsersAreInFunctions(GVar)))
3089 return SDValue();
3090
3091 // We're going to inline this global. Pad it out if needed.
3092 if (RequiredPadding != 4) {
3093 StringRef S = CDAInit->getAsString();
3094
3095 SmallVector<uint8_t,16> V(S.size());
3096 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3097 while (RequiredPadding--)
3098 V.push_back(0);
3099 Init = ConstantDataArray::get(*DAG.getContext(), V);
3100 }
3101
3102 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3103 SDValue CPAddr =
3104 DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3105 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3106 AFI->markGlobalAsPromotedToConstantPool(GVar);
3107 AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3108 PaddedSize - 4);
3109 }
3110 ++NumConstpoolPromoted;
3111 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3112}
3113
3114static bool isReadOnly(const GlobalValue *GV) {
3115 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3116 GV = GA->getBaseObject();
3117 return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3118 isa<Function>(GV);
3119}
3120
3121SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3122 SelectionDAG &DAG) const {
3123 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3124 SDLoc dl(Op);
3125 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3126 const TargetMachine &TM = getTargetMachine();
3127 bool IsRO = isReadOnly(GV);
3128
3129 // promoteToConstantPool only if not generating XO text section
3130 if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3131 if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
3132 return V;
3133
3134 if (isPositionIndependent()) {
3135 bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3136
3137 MachineFunction &MF = DAG.getMachineFunction();
3138 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3139 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3140 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3141 SDLoc dl(Op);
3142 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3143 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
3144 GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
3145 UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
3146 /*AddCurrentAddress=*/UseGOT_PREL);
3147 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3148 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3149 SDValue Result = DAG.getLoad(
3150 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3151 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3152 SDValue Chain = Result.getValue(1);
3153 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3154 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3155 if (UseGOT_PREL)
3156 Result =
3157 DAG.getLoad(PtrVT, dl, Chain, Result,
3158 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3159 return Result;
3160 } else if (Subtarget->isROPI() && IsRO) {
3161 // PC-relative.
3162 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3163 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3164 return Result;
3165 } else if (Subtarget->isRWPI() && !IsRO) {
3166 // SB-relative.
3167 SDValue RelAddr;
3168 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3169 ++NumMovwMovt;
3170 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3171 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3172 } else { // use literal pool for address constant
3173 ARMConstantPoolValue *CPV =
3174 ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
3175 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3176 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3177 RelAddr = DAG.getLoad(
3178 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3179 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3180 }
3181 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3182 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3183 return Result;
3184 }
3185
3186 // If we have T2 ops, we can materialize the address directly via movt/movw
3187 // pair. This is always cheaper.
3188 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3189 ++NumMovwMovt;
3190 // FIXME: Once remat is capable of dealing with instructions with register
3191 // operands, expand this into two nodes.
3192 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3193 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3194 } else {
3195 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3196 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3197 return DAG.getLoad(
3198 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3199 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3200 }
3201}
3202
3203SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3204 SelectionDAG &DAG) const {
3205 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3206, __PRETTY_FUNCTION__))
3206 "ROPI/RWPI not currently supported for Darwin")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3206, __PRETTY_FUNCTION__))
;
3207 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3208 SDLoc dl(Op);
3209 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3210
3211 if (Subtarget->useMovt(DAG.getMachineFunction()))
3212 ++NumMovwMovt;
3213
3214 // FIXME: Once remat is capable of dealing with instructions with register
3215 // operands, expand this into multiple nodes
3216 unsigned Wrapper =
3217 isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3218
3219 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3220 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3221
3222 if (Subtarget->isGVIndirectSymbol(GV))
3223 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3224 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3225 return Result;
3226}
3227
3228SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3229 SelectionDAG &DAG) const {
3230 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")((Subtarget->isTargetWindows() && "non-Windows COFF is not supported"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3230, __PRETTY_FUNCTION__))
;
3231 assert(Subtarget->useMovt(DAG.getMachineFunction()) &&((Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3232, __PRETTY_FUNCTION__))
3232 "Windows on ARM expects to use movw/movt")((Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3232, __PRETTY_FUNCTION__))
;
3233 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3234, __PRETTY_FUNCTION__))
3234 "ROPI/RWPI not currently supported for Windows")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3234, __PRETTY_FUNCTION__))
;
3235
3236 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3237 const ARMII::TOF TargetFlags =
3238 (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
3239 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3240 SDValue Result;
3241 SDLoc DL(Op);
3242
3243 ++NumMovwMovt;
3244
3245 // FIXME: Once remat is capable of dealing with instructions with register
3246 // operands, expand this into two nodes.
3247 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3248 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3249 TargetFlags));
3250 if (GV->hasDLLImportStorageClass())
3251 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3252 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3253 return Result;
3254}
3255
3256SDValue
3257ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3258 SDLoc dl(Op);
3259 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3260 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3261 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3262 Op.getOperand(1), Val);
3263}
3264
3265SDValue
3266ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3267 SDLoc dl(Op);
3268 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3269 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3270}
3271
3272SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3273 SelectionDAG &DAG) const {
3274 SDLoc dl(Op);
3275 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3276 Op.getOperand(0));
3277}
3278
3279SDValue
3280ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3281 const ARMSubtarget *Subtarget) const {
3282 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3283 SDLoc dl(Op);
3284 switch (IntNo) {
3285 default: return SDValue(); // Don't custom lower most intrinsics.
3286 case Intrinsic::thread_pointer: {
3287 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3288 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3289 }
3290 case Intrinsic::eh_sjlj_lsda: {
3291 MachineFunction &MF = DAG.getMachineFunction();
3292 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3293 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3294 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3295 SDValue CPAddr;
3296 bool IsPositionIndependent = isPositionIndependent();
3297 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3298 ARMConstantPoolValue *CPV =
3299 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
3300 ARMCP::CPLSDA, PCAdj);
3301 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3302 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3303 SDValue Result = DAG.getLoad(
3304 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3305 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3306
3307 if (IsPositionIndependent) {
3308 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3309 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3310 }
3311 return Result;
3312 }
3313 case Intrinsic::arm_neon_vmulls:
3314 case Intrinsic::arm_neon_vmullu: {
3315 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3316 ? ARMISD::VMULLs : ARMISD::VMULLu;
3317 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3318 Op.getOperand(1), Op.getOperand(2));
3319 }
3320 case Intrinsic::arm_neon_vminnm:
3321 case Intrinsic::arm_neon_vmaxnm: {
3322 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3323 ? ISD::FMINNUM : ISD::FMAXNUM;
3324 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3325 Op.getOperand(1), Op.getOperand(2));
3326 }
3327 case Intrinsic::arm_neon_vminu:
3328 case Intrinsic::arm_neon_vmaxu: {
3329 if (Op.getValueType().isFloatingPoint())
3330 return SDValue();
3331 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3332 ? ISD::UMIN : ISD::UMAX;
3333 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3334 Op.getOperand(1), Op.getOperand(2));
3335 }
3336 case Intrinsic::arm_neon_vmins:
3337 case Intrinsic::arm_neon_vmaxs: {
3338 // v{min,max}s is overloaded between signed integers and floats.
3339 if (!Op.getValueType().isFloatingPoint()) {
3340 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3341 ? ISD::SMIN : ISD::SMAX;
3342 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3343 Op.getOperand(1), Op.getOperand(2));
3344 }
3345 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3346 ? ISD::FMINNAN : ISD::FMAXNAN;
3347 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3348 Op.getOperand(1), Op.getOperand(2));
3349 }
3350 case Intrinsic::arm_neon_vtbl1:
3351 return DAG.getNode(ARMISD::VTBL1, SDLoc(Op), Op.getValueType(),
3352 Op.getOperand(1), Op.getOperand(2));
3353 case Intrinsic::arm_neon_vtbl2:
3354 return DAG.getNode(ARMISD::VTBL2, SDLoc(Op), Op.getValueType(),
3355 Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
3356 }
3357}
3358
3359static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
3360 const ARMSubtarget *Subtarget) {
3361 SDLoc dl(Op);
3362 ConstantSDNode *ScopeN = cast<ConstantSDNode>(Op.getOperand(2));
3363 auto Scope = static_cast<SynchronizationScope>(ScopeN->getZExtValue());
3364 if (Scope == SynchronizationScope::SingleThread)
3365 return Op;
3366
3367 if (!Subtarget->hasDataBarrier()) {
3368 // Some ARMv6 cpus can support data barriers with an mcr instruction.
3369 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3370 // here.
3371 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3372, __PRETTY_FUNCTION__))
3372 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3372, __PRETTY_FUNCTION__))
;
3373 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3374 DAG.getConstant(0, dl, MVT::i32));
3375 }
3376
3377 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3378 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3379 ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3380 if (Subtarget->isMClass()) {
3381 // Only a full system barrier exists in the M-class architectures.
3382 Domain = ARM_MB::SY;
3383 } else if (Subtarget->preferISHSTBarriers() &&
3384 Ord == AtomicOrdering::Release) {
3385 // Swift happens to implement ISHST barriers in a way that's compatible with
3386 // Release semantics but weaker than ISH so we'd be fools not to use
3387 // it. Beware: other processors probably don't!
3388 Domain = ARM_MB::ISHST;
3389 }
3390
3391 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3392 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3393 DAG.getConstant(Domain, dl, MVT::i32));
3394}
3395
3396static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
3397 const ARMSubtarget *Subtarget) {
3398 // ARM pre v5TE and Thumb1 does not have preload instructions.
3399 if (!(Subtarget->isThumb2() ||
3400 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3401 // Just preserve the chain.
3402 return Op.getOperand(0);
3403
3404 SDLoc dl(Op);
3405 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3406 if (!isRead &&
3407 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3408 // ARMv7 with MP extension has PLDW.
3409 return Op.getOperand(0);
3410
3411 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3412 if (Subtarget->isThumb()) {
3413 // Invert the bits.
3414 isRead = ~isRead & 1;
3415 isData = ~isData & 1;
3416 }
3417
3418 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3419 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3420 DAG.getConstant(isData, dl, MVT::i32));
3421}
3422
3423static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
3424 MachineFunction &MF = DAG.getMachineFunction();
3425 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3426
3427 // vastart just stores the address of the VarArgsFrameIndex slot into the
3428 // memory location argument.
3429 SDLoc dl(Op);
3430 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3431 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3432 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3433 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3434 MachinePointerInfo(SV));
3435}
3436
3437SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3438 CCValAssign &NextVA,
3439 SDValue &Root,
3440 SelectionDAG &DAG,
3441 const SDLoc &dl) const {
3442 MachineFunction &MF = DAG.getMachineFunction();
3443 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3444
3445 const TargetRegisterClass *RC;
3446 if (AFI->isThumb1OnlyFunction())
3447 RC = &ARM::tGPRRegClass;
3448 else
3449 RC = &ARM::GPRRegClass;
3450
3451 // Transform the arguments stored in physical registers into virtual ones.
3452 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3453 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3454
3455 SDValue ArgValue2;
3456 if (NextVA.isMemLoc()) {
3457 MachineFrameInfo &MFI = MF.getFrameInfo();
3458 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3459
3460 // Create load node to retrieve arguments from the stack.
3461 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3462 ArgValue2 = DAG.getLoad(
3463 MVT::i32, dl, Root, FIN,
3464 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3465 } else {
3466 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3467 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3468 }
3469 if (!Subtarget->isLittle())
3470 std::swap (ArgValue, ArgValue2);
3471 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3472}
3473
3474// The remaining GPRs hold either the beginning of variable-argument
3475// data, or the beginning of an aggregate passed by value (usually
3476// byval). Either way, we allocate stack slots adjacent to the data
3477// provided by our caller, and store the unallocated registers there.
3478// If this is a variadic function, the va_list pointer will begin with
3479// these values; otherwise, this reassembles a (byval) structure that
3480// was split between registers and memory.
3481// Return: The frame index registers were stored into.
3482int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3483 const SDLoc &dl, SDValue &Chain,
3484 const Value *OrigArg,
3485 unsigned InRegsParamRecordIdx,
3486 int ArgOffset, unsigned ArgSize) const {
3487 // Currently, two use-cases possible:
3488 // Case #1. Non-var-args function, and we meet first byval parameter.
3489 // Setup first unallocated register as first byval register;
3490 // eat all remained registers
3491 // (these two actions are performed by HandleByVal method).
3492 // Then, here, we initialize stack frame with
3493 // "store-reg" instructions.
3494 // Case #2. Var-args function, that doesn't contain byval parameters.
3495 // The same: eat all remained unallocated registers,
3496 // initialize stack frame.
3497
3498 MachineFunction &MF = DAG.getMachineFunction();
3499 MachineFrameInfo &MFI = MF.getFrameInfo();
3500 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3501 unsigned RBegin, REnd;
3502 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3503 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3504 } else {
3505 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3506 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3507 REnd = ARM::R4;
3508 }
3509
3510 if (REnd != RBegin)
3511 ArgOffset = -4 * (ARM::R4 - RBegin);
3512
3513 auto PtrVT = getPointerTy(DAG.getDataLayout());
3514 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3515 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3516
3517 SmallVector<SDValue, 4> MemOps;
3518 const TargetRegisterClass *RC =
3519 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3520
3521 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3522 unsigned VReg = MF.addLiveIn(Reg, RC);
3523 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3524 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3525 MachinePointerInfo(OrigArg, 4 * i));
3526 MemOps.push_back(Store);
3527 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3528 }
3529
3530 if (!MemOps.empty())
3531 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3532 return FrameIndex;
3533}
3534
3535// Setup stack frame, the va_list pointer will start from.
3536void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3537 const SDLoc &dl, SDValue &Chain,
3538 unsigned ArgOffset,
3539 unsigned TotalArgRegsSaveSize,
3540 bool ForceMutable) const {
3541 MachineFunction &MF = DAG.getMachineFunction();
3542 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3543
3544 // Try to store any remaining integer argument regs
3545 // to their spots on the stack so that they may be loaded by dereferencing
3546 // the result of va_next.
3547 // If there is no regs to be stored, just point address after last
3548 // argument passed via stack.
3549 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3550 CCInfo.getInRegsParamsCount(),
3551 CCInfo.getNextStackOffset(), 4);
3552 AFI->setVarArgsFrameIndex(FrameIndex);
3553}
3554
3555SDValue ARMTargetLowering::LowerFormalArguments(
3556 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3557 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3558 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3559 MachineFunction &MF = DAG.getMachineFunction();
3560 MachineFrameInfo &MFI = MF.getFrameInfo();
3561
3562 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3563
3564 // Assign locations to all of the incoming arguments.
3565 SmallVector<CCValAssign, 16> ArgLocs;
3566 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3567 *DAG.getContext());
3568 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3569
3570 SmallVector<SDValue, 16> ArgValues;
3571 SDValue ArgValue;
3572 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
3573 unsigned CurArgIdx = 0;
3574
3575 // Initially ArgRegsSaveSize is zero.
3576 // Then we increase this value each time we meet byval parameter.
3577 // We also increase this value in case of varargs function.
3578 AFI->setArgRegsSaveSize(0);
3579
3580 // Calculate the amount of stack space that we need to allocate to store
3581 // byval and variadic arguments that are passed in registers.
3582 // We need to know this before we allocate the first byval or variadic
3583 // argument, as they will be allocated a stack slot below the CFA (Canonical
3584 // Frame Address, the stack pointer at entry to the function).
3585 unsigned ArgRegBegin = ARM::R4;
3586 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3587 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3588 break;
3589
3590 CCValAssign &VA = ArgLocs[i];
3591 unsigned Index = VA.getValNo();
3592 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3593 if (!Flags.isByVal())
3594 continue;
3595
3596 assert(VA.isMemLoc() && "unexpected byval pointer in reg")((VA.isMemLoc() && "unexpected byval pointer in reg")
? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3596, __PRETTY_FUNCTION__))
;
3597 unsigned RBegin, REnd;
3598 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3599 ArgRegBegin = std::min(ArgRegBegin, RBegin);
3600
3601 CCInfo.nextInRegsParam();
3602 }
3603 CCInfo.rewindByValRegsInfo();
3604
3605 int lastInsIndex = -1;
3606 if (isVarArg && MFI.hasVAStart()) {
3607 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3608 if (RegIdx != array_lengthof(GPRArgRegs))
3609 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3610 }
3611
3612 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3613 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3614 auto PtrVT = getPointerTy(DAG.getDataLayout());
3615
3616 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3617 CCValAssign &VA = ArgLocs[i];
3618 if (Ins[VA.getValNo()].isOrigArg()) {
3619 std::advance(CurOrigArg,
3620 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3621 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3622 }
3623 // Arguments stored in registers.
3624 if (VA.isRegLoc()) {
3625 EVT RegVT = VA.getLocVT();
3626
3627 if (VA.needsCustom()) {
3628 // f64 and vector types are split up into multiple registers or
3629 // combinations of registers and stack slots.
3630 if (VA.getLocVT() == MVT::v2f64) {
3631 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3632 Chain, DAG, dl);
3633 VA = ArgLocs[++i]; // skip ahead to next loc
3634 SDValue ArgValue2;
3635 if (VA.isMemLoc()) {
3636 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3637 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3638 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3639 MachinePointerInfo::getFixedStack(
3640 DAG.getMachineFunction(), FI));
3641 } else {
3642 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3643 Chain, DAG, dl);
3644 }
3645 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3646 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3647 ArgValue, ArgValue1,
3648 DAG.getIntPtrConstant(0, dl));
3649 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3650 ArgValue, ArgValue2,
3651 DAG.getIntPtrConstant(1, dl));
3652 } else
3653 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3654
3655 } else {
3656 const TargetRegisterClass *RC;
3657
3658 if (RegVT == MVT::f32)
3659 RC = &ARM::SPRRegClass;
3660 else if (RegVT == MVT::f64)
3661 RC = &ARM::DPRRegClass;
3662 else if (RegVT == MVT::v2f64)
3663 RC = &ARM::QPRRegClass;
3664 else if (RegVT == MVT::i32)
3665 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3666 : &ARM::GPRRegClass;
3667 else
3668 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3668)
;
3669
3670 // Transform the arguments in physical registers into virtual ones.
3671 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3672 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3673 }
3674
3675 // If this is an 8 or 16-bit value, it is really passed promoted
3676 // to 32 bits. Insert an assert[sz]ext to capture this, then
3677 // truncate to the right size.
3678 switch (VA.getLocInfo()) {
3679 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3679)
;
3680 case CCValAssign::Full: break;
3681 case CCValAssign::BCvt:
3682 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3683 break;
3684 case CCValAssign::SExt:
3685 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3686 DAG.getValueType(VA.getValVT()));
3687 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3688 break;
3689 case CCValAssign::ZExt:
3690 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3691 DAG.getValueType(VA.getValVT()));
3692 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3693 break;
3694 }
3695
3696 InVals.push_back(ArgValue);
3697
3698 } else { // VA.isRegLoc()
3699 // sanity check
3700 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3700, __PRETTY_FUNCTION__))
;
3701 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")((VA.getValVT() != MVT::i64 && "i64 should already be lowered"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() != MVT::i64 && \"i64 should already be lowered\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3701, __PRETTY_FUNCTION__))
;
3702
3703 int index = VA.getValNo();
3704
3705 // Some Ins[] entries become multiple ArgLoc[] entries.
3706 // Process them only once.
3707 if (index != lastInsIndex)
3708 {
3709 ISD::ArgFlagsTy Flags = Ins[index].Flags;
3710 // FIXME: For now, all byval parameter objects are marked mutable.
3711 // This can be changed with more analysis.
3712 // In case of tail call optimization mark all arguments mutable.
3713 // Since they could be overwritten by lowering of arguments in case of
3714 // a tail call.
3715 if (Flags.isByVal()) {
3716 assert(Ins[index].isOrigArg() &&((Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3717, __PRETTY_FUNCTION__))
3717 "Byval arguments cannot be implicit")((Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3717, __PRETTY_FUNCTION__))
;
3718 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3719
3720 int FrameIndex = StoreByValRegs(
3721 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3722 VA.getLocMemOffset(), Flags.getByValSize());
3723 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3724 CCInfo.nextInRegsParam();
3725 } else {
3726 unsigned FIOffset = VA.getLocMemOffset();
3727 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3728 FIOffset, true);
3729
3730 // Create load nodes to retrieve arguments from the stack.
3731 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3732 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3733 MachinePointerInfo::getFixedStack(
3734 DAG.getMachineFunction(), FI)));
3735 }
3736 lastInsIndex = index;
3737 }
3738 }
3739 }
3740
3741 // varargs
3742 if (isVarArg && MFI.hasVAStart())
3743 VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3744 CCInfo.getNextStackOffset(),
3745 TotalArgRegsSaveSize);
3746
3747 AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
3748
3749 return Chain;
3750}
3751
3752/// isFloatingPointZero - Return true if this is +0.0.
3753static bool isFloatingPointZero(SDValue Op) {
3754 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3755 return CFP->getValueAPF().isPosZero();
3756 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3757 // Maybe this has already been legalized into the constant pool?
3758 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3759 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3760 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3761 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3762 return CFP->getValueAPF().isPosZero();
3763 }
3764 } else if (Op->getOpcode() == ISD::BITCAST &&
3765 Op->getValueType(0) == MVT::f64) {
3766 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3767 // created by LowerConstantFP().
3768 SDValue BitcastOp = Op->getOperand(0);
3769 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3770 isNullConstant(BitcastOp->getOperand(0)))
3771 return true;
3772 }
3773 return false;
3774}
3775
3776/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3777/// the given operands.
3778SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3779 SDValue &ARMcc, SelectionDAG &DAG,
3780 const SDLoc &dl) const {
3781 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3782 unsigned C = RHSC->getZExtValue();
3783 if (!isLegalICmpImmediate(C)) {
3784 // Constant does not fit, try adjusting it by one?
3785 switch (CC) {
3786 default: break;
3787 case ISD::SETLT:
3788 case ISD::SETGE:
3789 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3790 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3791 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3792 }
3793 break;
3794 case ISD::SETULT:
3795 case ISD::SETUGE:
3796 if (C != 0 && isLegalICmpImmediate(C-1)) {
3797 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3798 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3799 }
3800 break;
3801 case ISD::SETLE:
3802 case ISD::SETGT:
3803 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3804 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3805 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3806 }
3807 break;
3808 case ISD::SETULE:
3809 case ISD::SETUGT:
3810 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3811 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3812 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3813 }
3814 break;
3815 }
3816 }
3817 }
3818
3819 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3820 ARMISD::NodeType CompareType;
3821 switch (CondCode) {
3822 default:
3823 CompareType = ARMISD::CMP;
3824 break;
3825 case ARMCC::EQ:
3826 case ARMCC::NE:
3827 // Uses only Z Flag
3828 CompareType = ARMISD::CMPZ;
3829 break;
3830 }
3831 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3832 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3833}
3834
3835/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3836SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3837 SelectionDAG &DAG, const SDLoc &dl,
3838 bool InvalidOnQNaN) const {
3839 assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64)((!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64
) ? static_cast<void> (0) : __assert_fail ("!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3839, __PRETTY_FUNCTION__))
;
3840 SDValue Cmp;
3841 SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3842 if (!isFloatingPointZero(RHS))
3843 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3844 else
3845 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3846 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3847}
3848
3849/// duplicateCmp - Glue values can have only one use, so this function
3850/// duplicates a comparison node.
3851SDValue
3852ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3853 unsigned Opc = Cmp.getOpcode();
3854 SDLoc DL(Cmp);
3855 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3856 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3857
3858 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")((Opc == ARMISD::FMSTAT && "unexpected comparison operation"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3858, __PRETTY_FUNCTION__))
;
3859 Cmp = Cmp.getOperand(0);
3860 Opc = Cmp.getOpcode();
3861 if (Opc == ARMISD::CMPFP)
3862 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3863 Cmp.getOperand(1), Cmp.getOperand(2));
3864 else {
3865 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")((Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3865, __PRETTY_FUNCTION__))
;
3866 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3867 Cmp.getOperand(1));
3868 }
3869 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3870}
3871
3872std::pair<SDValue, SDValue>
3873ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3874 SDValue &ARMcc) const {
3875 assert(Op.getValueType() == MVT::i32 && "Unsupported value type")((Op.getValueType() == MVT::i32 && "Unsupported value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3875, __PRETTY_FUNCTION__))
;
3876
3877 SDValue Value, OverflowCmp;
3878 SDValue LHS = Op.getOperand(0);
3879 SDValue RHS = Op.getOperand(1);
3880 SDLoc dl(Op);
3881
3882 // FIXME: We are currently always generating CMPs because we don't support
3883 // generating CMN through the backend. This is not as good as the natural
3884 // CMP case because it causes a register dependency and cannot be folded
3885 // later.
3886
3887 switch (Op.getOpcode()) {
3888 default:
3889 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3889)
;
3890 case ISD::SADDO:
3891 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3892 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3893 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3894 break;
3895 case ISD::UADDO:
3896 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3897 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3898 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3899 break;
3900 case ISD::SSUBO:
3901 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3902 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3903 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3904 break;
3905 case ISD::USUBO:
3906 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3907 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3908 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3909 break;
3910 } // switch (...)
3911
3912 return std::make_pair(Value, OverflowCmp);
3913}
3914
3915SDValue
3916ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
3917 // Let legalize expand this if it isn't a legal type yet.
3918 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3919 return SDValue();
3920
3921 SDValue Value, OverflowCmp;
3922 SDValue ARMcc;
3923 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3924 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3925 SDLoc dl(Op);
3926 // We use 0 and 1 as false and true values.
3927 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3928 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3929 EVT VT = Op.getValueType();
3930
3931 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
3932 ARMcc, CCR, OverflowCmp);
3933
3934 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3935 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3936}
3937
3938SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3939 SDValue Cond = Op.getOperand(0);
3940 SDValue SelectTrue = Op.getOperand(1);
3941 SDValue SelectFalse = Op.getOperand(2);
3942 SDLoc dl(Op);
3943 unsigned Opc = Cond.getOpcode();
3944
3945 if (Cond.getResNo() == 1 &&
3946 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3947 Opc == ISD::USUBO)) {
3948 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
3949 return SDValue();
3950
3951 SDValue Value, OverflowCmp;
3952 SDValue ARMcc;
3953 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
3954 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3955 EVT VT = Op.getValueType();
3956
3957 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
3958 OverflowCmp, DAG);
3959 }
3960
3961 // Convert:
3962 //
3963 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
3964 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
3965 //
3966 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
3967 const ConstantSDNode *CMOVTrue =
3968 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
3969 const ConstantSDNode *CMOVFalse =
3970 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
3971
3972 if (CMOVTrue && CMOVFalse) {
3973 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
3974 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
3975
3976 SDValue True;
3977 SDValue False;
3978 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
3979 True = SelectTrue;
3980 False = SelectFalse;
3981 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
3982 True = SelectFalse;
3983 False = SelectTrue;
3984 }
3985
3986 if (True.getNode() && False.getNode()) {
3987 EVT VT = Op.getValueType();
3988 SDValue ARMcc = Cond.getOperand(2);
3989 SDValue CCR = Cond.getOperand(3);
3990 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
3991 assert(True.getValueType() == VT)((True.getValueType() == VT) ? static_cast<void> (0) : __assert_fail
("True.getValueType() == VT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 3991, __PRETTY_FUNCTION__))
;
3992 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
3993 }
3994 }
3995 }
3996
3997 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
3998 // undefined bits before doing a full-word comparison with zero.
3999 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
4000 DAG.getConstant(1, dl, Cond.getValueType()));
4001
4002 return DAG.getSelectCC(dl, Cond,
4003 DAG.getConstant(0, dl, Cond.getValueType()),
4004 SelectTrue, SelectFalse, ISD::SETNE);
4005}
4006
4007static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
4008 bool &swpCmpOps, bool &swpVselOps) {
4009 // Start by selecting the GE condition code for opcodes that return true for
4010 // 'equality'
4011 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4012 CC == ISD::SETULE)
4013 CondCode = ARMCC::GE;
4014
4015 // and GT for opcodes that return false for 'equality'.
4016 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4017 CC == ISD::SETULT)
4018 CondCode = ARMCC::GT;
4019
4020 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4021 // to swap the compare operands.
4022 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4023 CC == ISD::SETULT)
4024 swpCmpOps = true;
4025
4026 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4027 // If we have an unordered opcode, we need to swap the operands to the VSEL
4028 // instruction (effectively negating the condition).
4029 //
4030 // This also has the effect of swapping which one of 'less' or 'greater'
4031 // returns true, so we also swap the compare operands. It also switches
4032 // whether we return true for 'equality', so we compensate by picking the
4033 // opposite condition code to our original choice.
4034 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4035 CC == ISD::SETUGT) {
4036 swpCmpOps = !swpCmpOps;
4037 swpVselOps = !swpVselOps;
4038 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4039 }
4040
4041 // 'ordered' is 'anything but unordered', so use the VS condition code and
4042 // swap the VSEL operands.
4043 if (CC == ISD::SETO) {
4044 CondCode = ARMCC::VS;
4045 swpVselOps = true;
4046 }
4047
4048 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4049 // code and swap the VSEL operands.
4050 if (CC == ISD::SETUNE) {
4051 CondCode = ARMCC::EQ;
4052 swpVselOps = true;
4053 }
4054}
4055
4056SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4057 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4058 SDValue Cmp, SelectionDAG &DAG) const {
4059 if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
4060 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4061 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4062 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4063 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4064
4065 SDValue TrueLow = TrueVal.getValue(0);
4066 SDValue TrueHigh = TrueVal.getValue(1);
4067 SDValue FalseLow = FalseVal.getValue(0);
4068 SDValue FalseHigh = FalseVal.getValue(1);
4069
4070 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4071 ARMcc, CCR, Cmp);
4072 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4073 ARMcc, CCR, duplicateCmp(Cmp, DAG));
4074
4075 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
4076 } else {
4077 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
4078 Cmp);
4079 }
4080}
4081
4082static bool isGTorGE(ISD::CondCode CC) {
4083 return CC == ISD::SETGT || CC == ISD::SETGE;
4084}
4085
4086static bool isLTorLE(ISD::CondCode CC) {
4087 return CC == ISD::SETLT || CC == ISD::SETLE;
4088}
4089
4090// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
4091// All of these conditions (and their <= and >= counterparts) will do:
4092// x < k ? k : x
4093// x > k ? x : k
4094// k < x ? x : k
4095// k > x ? k : x
4096static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
4097 const SDValue TrueVal, const SDValue FalseVal,
4098 const ISD::CondCode CC, const SDValue K) {
4099 return (isGTorGE(CC) &&
4100 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
4101 (isLTorLE(CC) &&
4102 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
4103}
4104
4105// Similar to isLowerSaturate(), but checks for upper-saturating conditions.
4106static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
4107 const SDValue TrueVal, const SDValue FalseVal,
4108 const ISD::CondCode CC, const SDValue K) {
4109 return (isGTorGE(CC) &&
4110 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
4111 (isLTorLE(CC) &&
4112 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
4113}
4114
4115// Check if two chained conditionals could be converted into SSAT.
4116//
4117// SSAT can replace a set of two conditional selectors that bound a number to an
4118// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
4119//
4120// x < -k ? -k : (x > k ? k : x)
4121// x < -k ? -k : (x < k ? x : k)
4122// x > -k ? (x > k ? k : x) : -k
4123// x < k ? (x < -k ? -k : x) : k
4124// etc.
4125//
4126// It returns true if the conversion can be done, false otherwise.
4127// Additionally, the variable is returned in parameter V and the constant in K.
4128static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
4129 uint64_t &K) {
4130 SDValue LHS1 = Op.getOperand(0);
4131 SDValue RHS1 = Op.getOperand(1);
4132 SDValue TrueVal1 = Op.getOperand(2);
4133 SDValue FalseVal1 = Op.getOperand(3);
4134 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4135
4136 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
4137 if (Op2.getOpcode() != ISD::SELECT_CC)
4138 return false;
4139
4140 SDValue LHS2 = Op2.getOperand(0);
4141 SDValue RHS2 = Op2.getOperand(1);
4142 SDValue TrueVal2 = Op2.getOperand(2);
4143 SDValue FalseVal2 = Op2.getOperand(3);
4144 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
4145
4146 // Find out which are the constants and which are the variables
4147 // in each conditional
4148 SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
4149 ? &RHS1
4150 : nullptr;
4151 SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
4152 ? &RHS2
4153 : nullptr;
4154 SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
4155 SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
4156 SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
4157 SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
4158
4159 // We must detect cases where the original operations worked with 16- or
4160 // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
4161 // must work with sign-extended values but the select operations return
4162 // the original non-extended value.
4163 SDValue V2TmpReg = V2Tmp;
4164 if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
4165 V2TmpReg = V2Tmp->getOperand(0);
4166
4167 // Check that the registers and the constants have the correct values
4168 // in both conditionals
4169 if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
4170 V2TmpReg != V2)
4171 return false;
4172
4173 // Figure out which conditional is saturating the lower/upper bound.
4174 const SDValue *LowerCheckOp =
4175 isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4176 ? &Op
4177 : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4178 ? &Op2
4179 : nullptr;
4180 const SDValue *UpperCheckOp =
4181 isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4182 ? &Op
4183 : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4184 ? &Op2
4185 : nullptr;
4186
4187 if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
4188 return false;
4189
4190 // Check that the constant in the lower-bound check is
4191 // the opposite of the constant in the upper-bound check
4192 // in 1's complement.
4193 int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
4194 int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
4195 int64_t PosVal = std::max(Val1, Val2);
4196
4197 if (((Val1 > Val2 && UpperCheckOp == &Op) ||
4198 (Val1 < Val2 && UpperCheckOp == &Op2)) &&
4199 Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {
4200
4201 V = V2;
4202 K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
4203 return true;
4204 }
4205
4206 return false;
4207}
4208
4209SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4210 EVT VT = Op.getValueType();
4211 SDLoc dl(Op);
4212
4213 // Try to convert two saturating conditional selects into a single SSAT
4214 SDValue SatValue;
4215 uint64_t SatConstant;
4216 if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
4217 isSaturatingConditional(Op, SatValue, SatConstant))
4218 return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4219 DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4220
4221 SDValue LHS = Op.getOperand(0);
4222 SDValue RHS = Op.getOperand(1);
4223 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4224 SDValue TrueVal = Op.getOperand(2);
4225 SDValue FalseVal = Op.getOperand(3);
4226
4227 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4228 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4229 dl);
4230
4231 // If softenSetCCOperands only returned one value, we should compare it to
4232 // zero.
4233 if (!RHS.getNode()) {
4234 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4235 CC = ISD::SETNE;
4236 }
4237 }
4238
4239 if (LHS.getValueType() == MVT::i32) {
4240 // Try to generate VSEL on ARMv8.
4241 // The VSEL instruction can't use all the usual ARM condition
4242 // codes: it only has two bits to select the condition code, so it's
4243 // constrained to use only GE, GT, VS and EQ.
4244 //
4245 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
4246 // swap the operands of the previous compare instruction (effectively
4247 // inverting the compare condition, swapping 'less' and 'greater') and
4248 // sometimes need to swap the operands to the VSEL (which inverts the
4249 // condition in the sense of firing whenever the previous condition didn't)
4250 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4251 TrueVal.getValueType() == MVT::f64)) {
4252 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4253 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
4254 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
4255 CC = ISD::getSetCCInverse(CC, true);
4256 std::swap(TrueVal, FalseVal);
4257 }
4258 }
4259
4260 SDValue ARMcc;
4261 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4262 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4263 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4264 }
4265
4266 ARMCC::CondCodes CondCode, CondCode2;
4267 bool InvalidOnQNaN;
4268 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4269
4270 // Try to generate VMAXNM/VMINNM on ARMv8.
4271 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4272 TrueVal.getValueType() == MVT::f64)) {
4273 bool swpCmpOps = false;
4274 bool swpVselOps = false;
4275 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
4276
4277 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
4278 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
4279 if (swpCmpOps)
4280 std::swap(LHS, RHS);
4281 if (swpVselOps)
4282 std::swap(TrueVal, FalseVal);
4283 }
4284 }
4285
4286 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4287 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4288 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4289 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4290 if (CondCode2 != ARMCC::AL) {
4291 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
4292 // FIXME: Needs another CMP because flag can have but one use.
4293 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4294 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
4295 }
4296 return Result;
4297}
4298
4299/// canChangeToInt - Given the fp compare operand, return true if it is suitable
4300/// to morph to an integer compare sequence.
4301static bool canChangeToInt(SDValue Op, bool &SeenZero,
4302 const ARMSubtarget *Subtarget) {
4303 SDNode *N = Op.getNode();
4304 if (!N->hasOneUse())
4305 // Otherwise it requires moving the value from fp to integer registers.
4306 return false;
4307 if (!N->getNumValues())
4308 return false;
4309 EVT VT = Op.getValueType();
4310 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
4311 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
4312 // vmrs are very slow, e.g. cortex-a8.
4313 return false;
4314
4315 if (isFloatingPointZero(Op)) {
4316 SeenZero = true;
4317 return true;
4318 }
4319 return ISD::isNormalLoad(N);
4320}
4321
4322static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
4323 if (isFloatingPointZero(Op))
4324 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
4325
4326 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
4327 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
4328 Ld->getPointerInfo(), Ld->getAlignment(),
4329 Ld->getMemOperand()->getFlags());
4330
4331 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4331)
;
4332}
4333
4334static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
4335 SDValue &RetVal1, SDValue &RetVal2) {
4336 SDLoc dl(Op);
4337
4338 if (isFloatingPointZero(Op)) {
4339 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
4340 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
4341 return;
4342 }
4343
4344 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
4345 SDValue Ptr = Ld->getBasePtr();
4346 RetVal1 =
4347 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
4348 Ld->getAlignment(), Ld->getMemOperand()->getFlags());
4349
4350 EVT PtrType = Ptr.getValueType();
4351 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
4352 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
4353 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
4354 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
4355 Ld->getPointerInfo().getWithOffset(4), NewAlign,
4356 Ld->getMemOperand()->getFlags());
4357 return;
4358 }
4359
4360 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4360)
;
4361}
4362
4363/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
4364/// f32 and even f64 comparisons to integer ones.
4365SDValue
4366ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
4367 SDValue Chain = Op.getOperand(0);
4368 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4369 SDValue LHS = Op.getOperand(2);
4370 SDValue RHS = Op.getOperand(3);
4371 SDValue Dest = Op.getOperand(4);
4372 SDLoc dl(Op);
4373
4374 bool LHSSeenZero = false;
4375 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
4376 bool RHSSeenZero = false;
4377 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
4378 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
4379 // If unsafe fp math optimization is enabled and there are no other uses of
4380 // the CMP operands, and the condition code is EQ or NE, we can optimize it
4381 // to an integer comparison.
4382 if (CC == ISD::SETOEQ)
4383 CC = ISD::SETEQ;
4384 else if (CC == ISD::SETUNE)
4385 CC = ISD::SETNE;
4386
4387 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4388 SDValue ARMcc;
4389 if (LHS.getValueType() == MVT::f32) {
4390 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4391 bitcastf32Toi32(LHS, DAG), Mask);
4392 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4393 bitcastf32Toi32(RHS, DAG), Mask);
4394 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4395 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4396 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4397 Chain, Dest, ARMcc, CCR, Cmp);
4398 }
4399
4400 SDValue LHS1, LHS2;
4401 SDValue RHS1, RHS2;
4402 expandf64Toi32(LHS, DAG, LHS1, LHS2);
4403 expandf64Toi32(RHS, DAG, RHS1, RHS2);
4404 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
4405 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
4406 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4407 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4408 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4409 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
4410 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
4411 }
4412
4413 return SDValue();
4414}
4415
4416SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4417 SDValue Chain = Op.getOperand(0);
4418 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4419 SDValue LHS = Op.getOperand(2);
4420 SDValue RHS = Op.getOperand(3);
4421 SDValue Dest = Op.getOperand(4);
4422 SDLoc dl(Op);
4423
4424 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4425 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4426 dl);
4427
4428 // If softenSetCCOperands only returned one value, we should compare it to
4429 // zero.
4430 if (!RHS.getNode()) {
4431 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4432 CC = ISD::SETNE;
4433 }
4434 }
4435
4436 if (LHS.getValueType() == MVT::i32) {
4437 SDValue ARMcc;
4438 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4439 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4440 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4441 Chain, Dest, ARMcc, CCR, Cmp);
4442 }
4443
4444 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT
::f64) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4444, __PRETTY_FUNCTION__))
;
4445
4446 if (getTargetMachine().Options.UnsafeFPMath &&
4447 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
4448 CC == ISD::SETNE || CC == ISD::SETUNE)) {
4449 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
4450 return Result;
4451 }
4452
4453 ARMCC::CondCodes CondCode, CondCode2;
4454 bool InvalidOnQNaN;
4455 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4456
4457 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4458 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4459 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4460 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4461 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
4462 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4463 if (CondCode2 != ARMCC::AL) {
4464 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
4465 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
4466 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4467 }
4468 return Res;
4469}
4470
4471SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
4472 SDValue Chain = Op.getOperand(0);
4473 SDValue Table = Op.getOperand(1);
4474 SDValue Index = Op.getOperand(2);
4475 SDLoc dl(Op);
4476
4477 EVT PTy = getPointerTy(DAG.getDataLayout());
4478 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
4479 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
4480 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
4481 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
4482 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
4483 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
4484 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
4485 // which does another jump to the destination. This also makes it easier
4486 // to translate it to TBB / TBH later (Thumb2 only).
4487 // FIXME: This might not work if the function is extremely large.
4488 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
4489 Addr, Op.getOperand(2), JTI);
4490 }
4491 if (isPositionIndependent() || Subtarget->isROPI()) {
4492 Addr =
4493 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
4494 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4495 Chain = Addr.getValue(1);
4496 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
4497 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4498 } else {
4499 Addr =
4500 DAG.getLoad(PTy, dl, Chain, Addr,
4501 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4502 Chain = Addr.getValue(1);
4503 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4504 }
4505}
4506
4507static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
4508 EVT VT = Op.getValueType();
4509 SDLoc dl(Op);
4510
4511 if (Op.getValueType().getVectorElementType() == MVT::i32) {
4512 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
4513 return Op;
4514 return DAG.UnrollVectorOp(Op.getNode());
4515 }
4516
4517 assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&((Op.getOperand(0).getValueType() == MVT::v4f32 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4518, __PRETTY_FUNCTION__))
4518 "Invalid type for custom lowering!")((Op.getOperand(0).getValueType() == MVT::v4f32 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4518, __PRETTY_FUNCTION__))
;
4519 if (VT != MVT::v4i16)
4520 return DAG.UnrollVectorOp(Op.getNode());
4521
4522 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
4523 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
4524}
4525
4526SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
4527 EVT VT = Op.getValueType();
4528 if (VT.isVector())
4529 return LowerVectorFP_TO_INT(Op, DAG);
4530 if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
4531 RTLIB::Libcall LC;
4532 if (Op.getOpcode() == ISD::FP_TO_SINT)
4533 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
4534 Op.getValueType());
4535 else
4536 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
4537 Op.getValueType());
4538 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4539 /*isSigned*/ false, SDLoc(Op)).first;
4540 }
4541
4542 return Op;
4543}
4544
4545static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
4546 EVT VT = Op.getValueType();
4547 SDLoc dl(Op);
4548
4549 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
4550 if (VT.getVectorElementType() == MVT::f32)
4551 return Op;
4552 return DAG.UnrollVectorOp(Op.getNode());
4553 }
4554
4555 assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&((Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4556, __PRETTY_FUNCTION__))
4556 "Invalid type for custom lowering!")((Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4556, __PRETTY_FUNCTION__))
;
4557 if (VT != MVT::v4f32)
4558 return DAG.UnrollVectorOp(Op.getNode());
4559
4560 unsigned CastOpc;
4561 unsigned Opc;
4562 switch (Op.getOpcode()) {
4563 default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4563)
;
4564 case ISD::SINT_TO_FP:
4565 CastOpc = ISD::SIGN_EXTEND;
4566 Opc = ISD::SINT_TO_FP;
4567 break;
4568 case ISD::UINT_TO_FP:
4569 CastOpc = ISD::ZERO_EXTEND;
4570 Opc = ISD::UINT_TO_FP;
4571 break;
4572 }
4573
4574 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
4575 return DAG.getNode(Opc, dl, VT, Op);
4576}
4577
4578SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
4579 EVT VT = Op.getValueType();
4580 if (VT.isVector())
4581 return LowerVectorINT_TO_FP(Op, DAG);
4582 if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
4583 RTLIB::Libcall LC;
4584 if (Op.getOpcode() == ISD::SINT_TO_FP)
4585 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
4586 Op.getValueType());
4587 else
4588 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
4589 Op.getValueType());
4590 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4591 /*isSigned*/ false, SDLoc(Op)).first;
4592 }
4593
4594 return Op;
4595}
4596
4597SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
4598 // Implement fcopysign with a fabs and a conditional fneg.
4599 SDValue Tmp0 = Op.getOperand(0);
4600 SDValue Tmp1 = Op.getOperand(1);
4601 SDLoc dl(Op);
4602 EVT VT = Op.getValueType();
4603 EVT SrcVT = Tmp1.getValueType();
4604 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
4605 Tmp0.getOpcode() == ARMISD::VMOVDRR;
4606 bool UseNEON = !InGPR && Subtarget->hasNEON();
4607
4608 if (UseNEON) {
4609 // Use VBSL to copy the sign bit.
4610 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
4611 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
4612 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
4613 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
4614 if (VT == MVT::f64)
4615 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4616 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
4617 DAG.getConstant(32, dl, MVT::i32));
4618 else /*if (VT == MVT::f32)*/
4619 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
4620 if (SrcVT == MVT::f32) {
4621 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
4622 if (VT == MVT::f64)
4623 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4624 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
4625 DAG.getConstant(32, dl, MVT::i32));
4626 } else if (VT == MVT::f32)
4627 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
4628 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
4629 DAG.getConstant(32, dl, MVT::i32));
4630 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
4631 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
4632
4633 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
4634 dl, MVT::i32);
4635 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
4636 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
4637 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
4638
4639 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
4640 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
4641 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
4642 if (VT == MVT::f32) {
4643 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
4644 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
4645 DAG.getConstant(0, dl, MVT::i32));
4646 } else {
4647 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
4648 }
4649
4650 return Res;
4651 }
4652
4653 // Bitcast operand 1 to i32.
4654 if (SrcVT == MVT::f64)
4655 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4656 Tmp1).getValue(1);
4657 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
4658
4659 // Or in the signbit with integer operations.
4660 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
4661 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4662 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
4663 if (VT == MVT::f32) {
4664 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
4665 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
4666 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4667 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
4668 }
4669
4670 // f64: Or the high part with signbit and then combine two parts.
4671 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4672 Tmp0);
4673 SDValue Lo = Tmp0.getValue(0);
4674 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
4675 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
4676 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
4677}
4678
4679SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
4680 MachineFunction &MF = DAG.getMachineFunction();
4681 MachineFrameInfo &MFI = MF.getFrameInfo();
4682 MFI.setReturnAddressIsTaken(true);
4683
4684 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
4685 return SDValue();
4686
4687 EVT VT = Op.getValueType();
4688 SDLoc dl(Op);
4689 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4690 if (Depth) {
4691 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4692 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
4693 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
4694 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
4695 MachinePointerInfo());
4696 }
4697
4698 // Return LR, which contains the return address. Mark it an implicit live-in.
4699 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4700 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
4701}
4702
4703SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
4704 const ARMBaseRegisterInfo &ARI =
4705 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
4706 MachineFunction &MF = DAG.getMachineFunction();
4707 MachineFrameInfo &MFI = MF.getFrameInfo();
4708 MFI.setFrameAddressIsTaken(true);
4709
4710 EVT VT = Op.getValueType();
4711 SDLoc dl(Op); // FIXME probably not meaningful
4712 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4713 unsigned FrameReg = ARI.getFrameRegister(MF);
4714 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
4715 while (Depth--)
4716 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
4717 MachinePointerInfo());
4718 return FrameAddr;
4719}
4720
4721// FIXME? Maybe this could be a TableGen attribute on some registers and
4722// this table could be generated automatically from RegInfo.
4723unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
4724 SelectionDAG &DAG) const {
4725 unsigned Reg = StringSwitch<unsigned>(RegName)
4726 .Case("sp", ARM::SP)
4727 .Default(0);
4728 if (Reg)
4729 return Reg;
4730 report_fatal_error(Twine("Invalid register name \""
4731 + StringRef(RegName) + "\"."));
4732}
4733
4734// Result is 64 bit value so split into two 32 bit values and return as a
4735// pair of values.
4736static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
4737 SelectionDAG &DAG) {
4738 SDLoc DL(N);
4739
4740 // This function is only supposed to be called for i64 type destination.
4741 assert(N->getValueType(0) == MVT::i64((N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4742, __PRETTY_FUNCTION__))
4742 && "ExpandREAD_REGISTER called for non-i64 type result.")((N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4742, __PRETTY_FUNCTION__))
;
4743
4744 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
4745 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
4746 N->getOperand(0),
4747 N->getOperand(1));
4748
4749 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
4750 Read.getValue(1)));
4751 Results.push_back(Read.getOperand(0));
4752}
4753
4754/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
4755/// When \p DstVT, the destination type of \p BC, is on the vector
4756/// register bank and the source of bitcast, \p Op, operates on the same bank,
4757/// it might be possible to combine them, such that everything stays on the
4758/// vector register bank.
4759/// \p return The node that would replace \p BT, if the combine
4760/// is possible.
4761static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
4762 SelectionDAG &DAG) {
4763 SDValue Op = BC->getOperand(0);
4764 EVT DstVT = BC->getValueType(0);
4765
4766 // The only vector instruction that can produce a scalar (remember,
4767 // since the bitcast was about to be turned into VMOVDRR, the source
4768 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
4769 // Moreover, we can do this combine only if there is one use.
4770 // Finally, if the destination type is not a vector, there is not
4771 // much point on forcing everything on the vector bank.
4772 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
4773 !Op.hasOneUse())
4774 return SDValue();
4775
4776 // If the index is not constant, we will introduce an additional
4777 // multiply that will stick.
4778 // Give up in that case.
4779 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4780 if (!Index)
4781 return SDValue();
4782 unsigned DstNumElt = DstVT.getVectorNumElements();
4783
4784 // Compute the new index.
4785 const APInt &APIntIndex = Index->getAPIntValue();
4786 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
4787 NewIndex *= APIntIndex;
4788 // Check if the new constant index fits into i32.
4789 if (NewIndex.getBitWidth() > 32)
4790 return SDValue();
4791
4792 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
4793 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
4794 SDLoc dl(Op);
4795 SDValue ExtractSrc = Op.getOperand(0);
4796 EVT VecVT = EVT::getVectorVT(
4797 *DAG.getContext(), DstVT.getScalarType(),
4798 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
4799 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
4800 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
4801 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
4802}
4803
4804/// ExpandBITCAST - If the target supports VFP, this function is called to
4805/// expand a bit convert where either the source or destination type is i64 to
4806/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
4807/// operand type is illegal (e.g., v2f32 for a target that doesn't support
4808/// vectors), since the legalizer won't know what to do with that.
4809static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
4810 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4811 SDLoc dl(N);
4812 SDValue Op = N->getOperand(0);
4813
4814 // This function is only supposed to be called for i64 types, either as the
4815 // source or destination of the bit convert.
4816 EVT SrcVT = Op.getValueType();
4817 EVT DstVT = N->getValueType(0);
4818 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&(((SrcVT == MVT::i64 || DstVT == MVT::i64) && "ExpandBITCAST called for non-i64 type"
) ? static_cast<void> (0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4819, __PRETTY_FUNCTION__))
4819 "ExpandBITCAST called for non-i64 type")(((SrcVT == MVT::i64 || DstVT == MVT::i64) && "ExpandBITCAST called for non-i64 type"
) ? static_cast<void> (0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4819, __PRETTY_FUNCTION__))
;
4820
4821 // Turn i64->f64 into VMOVDRR.
4822 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
4823 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
4824 // if we can combine the bitcast with its source.
4825 if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
4826 return Val;
4827
4828 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4829 DAG.getConstant(0, dl, MVT::i32));
4830 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4831 DAG.getConstant(1, dl, MVT::i32));
4832 return DAG.getNode(ISD::BITCAST, dl, DstVT,
4833 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
4834 }
4835
4836 // Turn f64->i64 into VMOVRRD.
4837 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
4838 SDValue Cvt;
4839 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
4840 SrcVT.getVectorNumElements() > 1)
4841 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4842 DAG.getVTList(MVT::i32, MVT::i32),
4843 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
4844 else
4845 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4846 DAG.getVTList(MVT::i32, MVT::i32), Op);
4847 // Merge the pieces into a single i64 value.
4848 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
4849 }
4850
4851 return SDValue();
4852}
4853
4854/// getZeroVector - Returns a vector of specified type with all zero elements.
4855/// Zero vectors are used to represent vector negation and in those cases
4856/// will be implemented with the NEON VNEG instruction. However, VNEG does
4857/// not support i64 elements, so sometimes the zero vectors will need to be
4858/// explicitly constructed. Regardless, use a canonical VMOV to create the
4859/// zero vector.
4860static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4861 assert(VT.isVector() && "Expected a vector type")((VT.isVector() && "Expected a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"Expected a vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4861, __PRETTY_FUNCTION__))
;
4862 // The canonical modified immediate encoding of a zero vector is....0!
4863 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
4864 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
4865 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
4866 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4867}
4868
4869/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4870/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4871SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
4872 SelectionDAG &DAG) const {
4873 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4873, __PRETTY_FUNCTION__))
;
4874 EVT VT = Op.getValueType();
4875 unsigned VTBits = VT.getSizeInBits();
4876 SDLoc dl(Op);
4877 SDValue ShOpLo = Op.getOperand(0);
4878 SDValue ShOpHi = Op.getOperand(1);
4879 SDValue ShAmt = Op.getOperand(2);
4880 SDValue ARMcc;
4881 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4882 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4883
4884 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::
SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4884, __PRETTY_FUNCTION__))
;
4885
4886 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4887 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4888 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4889 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4890 DAG.getConstant(VTBits, dl, MVT::i32));
4891 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4892 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4893 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4894 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4895 ISD::SETGE, ARMcc, DAG, dl);
4896 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
4897 ARMcc, CCR, CmpLo);
4898
4899
4900 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4901 SDValue HiBigShift = Opc == ISD::SRA
4902 ? DAG.getNode(Opc, dl, VT, ShOpHi,
4903 DAG.getConstant(VTBits - 1, dl, VT))
4904 : DAG.getConstant(0, dl, VT);
4905 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4906 ISD::SETGE, ARMcc, DAG, dl);
4907 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
4908 ARMcc, CCR, CmpHi);
4909
4910 SDValue Ops[2] = { Lo, Hi };
4911 return DAG.getMergeValues(Ops, dl);
4912}
4913
4914/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4915/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4916SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
4917 SelectionDAG &DAG) const {
4918 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4918, __PRETTY_FUNCTION__))
;
4919 EVT VT = Op.getValueType();
4920 unsigned VTBits = VT.getSizeInBits();
4921 SDLoc dl(Op);
4922 SDValue ShOpLo = Op.getOperand(0);
4923 SDValue ShOpHi = Op.getOperand(1);
4924 SDValue ShAmt = Op.getOperand(2);
4925 SDValue ARMcc;
4926 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4927
4928 assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void>
(0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4928, __PRETTY_FUNCTION__))
;
4929 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4930 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4931 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4932 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4933 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4934
4935 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4936 DAG.getConstant(VTBits, dl, MVT::i32));
4937 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4938 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4939 ISD::SETGE, ARMcc, DAG, dl);
4940 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
4941 ARMcc, CCR, CmpHi);
4942
4943 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4944 ISD::SETGE, ARMcc, DAG, dl);
4945 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4946 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
4947 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
4948
4949 SDValue Ops[2] = { Lo, Hi };
4950 return DAG.getMergeValues(Ops, dl);
4951}
4952
4953SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
4954 SelectionDAG &DAG) const {
4955 // The rounding mode is in bits 23:22 of the FPSCR.
4956 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
4957 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
4958 // so that the shift + and get folded into a bitfield extract.
4959 SDLoc dl(Op);
4960 SDValue Ops[] = { DAG.getEntryNode(),
4961 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
4962
4963 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
4964 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
4965 DAG.getConstant(1U << 22, dl, MVT::i32));
4966 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
4967 DAG.getConstant(22, dl, MVT::i32));
4968 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
4969 DAG.getConstant(3, dl, MVT::i32));
4970}
4971
4972static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
4973 const ARMSubtarget *ST) {
4974 SDLoc dl(N);
4975 EVT VT = N->getValueType(0);
4976 if (VT.isVector()) {
4977 assert(ST->hasNEON())((ST->hasNEON()) ? static_cast<void> (0) : __assert_fail
("ST->hasNEON()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 4977, __PRETTY_FUNCTION__))
;
4978
4979 // Compute the least significant set bit: LSB = X & -X
4980 SDValue X = N->getOperand(0);
4981 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
4982 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
4983
4984 EVT ElemTy = VT.getVectorElementType();
4985
4986 if (ElemTy == MVT::i8) {
4987 // Compute with: cttz(x) = ctpop(lsb - 1)
4988 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
4989 DAG.getTargetConstant(1, dl, ElemTy));
4990 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
4991 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
4992 }
4993
4994 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
4995 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
4996 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
4997 unsigned NumBits = ElemTy.getSizeInBits();
4998 SDValue WidthMinus1 =
4999 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5000 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
5001 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
5002 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
5003 }
5004
5005 // Compute with: cttz(x) = ctpop(lsb - 1)
5006
5007 // Since we can only compute the number of bits in a byte with vcnt.8, we
5008 // have to gather the result with pairwise addition (vpaddl) for i16, i32,
5009 // and i64.
5010
5011 // Compute LSB - 1.
5012 SDValue Bits;
5013 if (ElemTy == MVT::i64) {
5014 // Load constant 0xffff'ffff'ffff'ffff to register.
5015 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5016 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
5017 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
5018 } else {
5019 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5020 DAG.getTargetConstant(1, dl, ElemTy));
5021 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5022 }
5023
5024 // Count #bits with vcnt.8.
5025 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5026 SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
5027 SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
5028
5029 // Gather the #bits with vpaddl (pairwise add.)
5030 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5031 SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
5032 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5033 Cnt8);
5034 if (ElemTy == MVT::i16)
5035 return Cnt16;
5036
5037 EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
5038 SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
5039 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5040 Cnt16);
5041 if (ElemTy == MVT::i32)
5042 return Cnt32;
5043
5044 assert(ElemTy == MVT::i64)((ElemTy == MVT::i64) ? static_cast<void> (0) : __assert_fail
("ElemTy == MVT::i64", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5044, __PRETTY_FUNCTION__))
;
5045 SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5046 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5047 Cnt32);
5048 return Cnt64;
5049 }
5050
5051 if (!ST->hasV6T2Ops())
5052 return SDValue();
5053
5054 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
5055 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
5056}
5057
5058/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
5059/// for each 16-bit element from operand, repeated. The basic idea is to
5060/// leverage vcnt to get the 8-bit counts, gather and add the results.
5061///
5062/// Trace for v4i16:
5063/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
5064/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
5065/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
5066/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
5067/// [b0 b1 b2 b3 b4 b5 b6 b7]
5068/// +[b1 b0 b3 b2 b5 b4 b7 b6]
5069/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
5070/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
5071static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
5072 EVT VT = N->getValueType(0);
5073 SDLoc DL(N);
5074
5075 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5076 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
5077 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
5078 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
5079 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
5080 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
5081}
5082
5083/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
5084/// bit-count for each 16-bit element from the operand. We need slightly
5085/// different sequencing for v4i16 and v8i16 to stay within NEON's available
5086/// 64/128-bit registers.
5087///
5088/// Trace for v4i16:
5089/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
5090/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
5091/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
5092/// v4i16:Extracted = [k0 k1 k2 k3 ]
5093static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
5094 EVT VT = N->getValueType(0);
5095 SDLoc DL(N);
5096
5097 SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
5098 if (VT.is64BitVector()) {
5099 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
5100 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
5101 DAG.getIntPtrConstant(0, DL));
5102 } else {
5103 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
5104 BitCounts, DAG.getIntPtrConstant(0, DL));
5105 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
5106 }
5107}
5108
5109/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
5110/// bit-count for each 32-bit element from the operand. The idea here is
5111/// to split the vector into 16-bit elements, leverage the 16-bit count
5112/// routine, and then combine the results.
5113///
5114/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
5115/// input = [v0 v1 ] (vi: 32-bit elements)
5116/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
5117/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
5118/// vrev: N0 = [k1 k0 k3 k2 ]
5119/// [k0 k1 k2 k3 ]
5120/// N1 =+[k1 k0 k3 k2 ]
5121/// [k0 k2 k1 k3 ]
5122/// N2 =+[k1 k3 k0 k2 ]
5123/// [k0 k2 k1 k3 ]
5124/// Extended =+[k1 k3 k0 k2 ]
5125/// [k0 k2 ]
5126/// Extracted=+[k1 k3 ]
5127///
5128static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
5129 EVT VT = N->getValueType(0);
5130 SDLoc DL(N);
5131
5132 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5133
5134 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
5135 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
5136 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
5137 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
5138 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
5139
5140 if (VT.is64BitVector()) {
5141 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
5142 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
5143 DAG.getIntPtrConstant(0, DL));
5144 } else {
5145 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
5146 DAG.getIntPtrConstant(0, DL));
5147 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
5148 }
5149}
5150
5151static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
5152 const ARMSubtarget *ST) {
5153 EVT VT = N->getValueType(0);
5154
5155 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.")((ST->hasNEON() && "Custom ctpop lowering requires NEON."
) ? static_cast<void> (0) : __assert_fail ("ST->hasNEON() && \"Custom ctpop lowering requires NEON.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5155, __PRETTY_FUNCTION__))
;
5156 assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5158, __PRETTY_FUNCTION__))
5157 VT == MVT::v4i16 || VT == MVT::v8i16) &&(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5158, __PRETTY_FUNCTION__))
5158 "Unexpected type for custom ctpop lowering")(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5158, __PRETTY_FUNCTION__))
;
5159
5160 if (VT.getVectorElementType() == MVT::i32)
5161 return lowerCTPOP32BitElements(N, DAG);
5162 else
5163 return lowerCTPOP16BitElements(N, DAG);
5164}
5165
5166static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
5167 const ARMSubtarget *ST) {
5168 EVT VT = N->getValueType(0);
5169 SDLoc dl(N);
5170
5171 if (!VT.isVector())
5172 return SDValue();
5173
5174 // Lower vector shifts on NEON to use VSHL.
5175 assert(ST->hasNEON() && "unexpected vector shift")((ST->hasNEON() && "unexpected vector shift") ? static_cast
<void> (0) : __assert_fail ("ST->hasNEON() && \"unexpected vector shift\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5175, __PRETTY_FUNCTION__))
;
5176
5177 // Left shifts translate directly to the vshiftu intrinsic.
5178 if (N->getOpcode() == ISD::SHL)
5179 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5180 DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
5181 MVT::i32),
5182 N->getOperand(0), N->getOperand(1));
5183
5184 assert((N->getOpcode() == ISD::SRA ||(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "unexpected vector shift opcode") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5185, __PRETTY_FUNCTION__))
5185 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode")(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "unexpected vector shift opcode") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5185, __PRETTY_FUNCTION__))
;
5186
5187 // NEON uses the same intrinsics for both left and right shifts. For
5188 // right shifts, the shift amounts are negative, so negate the vector of
5189 // shift amounts.
5190 EVT ShiftVT = N->getOperand(1).getValueType();
5191 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
5192 getZeroVector(ShiftVT, DAG, dl),
5193 N->getOperand(1));
5194 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
5195 Intrinsic::arm_neon_vshifts :
5196 Intrinsic::arm_neon_vshiftu);
5197 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5198 DAG.getConstant(vshiftInt, dl, MVT::i32),
5199 N->getOperand(0), NegatedCount);
5200}
5201
5202static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
5203 const ARMSubtarget *ST) {
5204 EVT VT = N->getValueType(0);
5205 SDLoc dl(N);
5206
5207 // We can get here for a node like i32 = ISD::SHL i32, i64
5208 if (VT != MVT::i64)
5209 return SDValue();
5210
5211 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA) && "Unknown shift to lower!") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5212, __PRETTY_FUNCTION__))
5212 "Unknown shift to lower!")(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA) && "Unknown shift to lower!") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5212, __PRETTY_FUNCTION__))
;
5213
5214 // We only lower SRA, SRL of 1 here, all others use generic lowering.
5215 if (!isOneConstant(N->getOperand(1)))
5216 return SDValue();
5217
5218 // If we are in thumb mode, we don't have RRX.
5219 if (ST->isThumb1Only()) return SDValue();
5220
5221 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
5222 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5223 DAG.getConstant(0, dl, MVT::i32));
5224 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5225 DAG.getConstant(1, dl, MVT::i32));
5226
5227 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
5228 // captures the result into a carry flag.
5229 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
5230 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
5231
5232 // The low part is an ARMISD::RRX operand, which shifts the carry in.
5233 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
5234
5235 // Merge the pieces into a single i64 value.
5236 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5237}
5238
5239static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
5240 SDValue TmpOp0, TmpOp1;
5241 bool Invert = false;
5242 bool Swap = false;
5243 unsigned Opc = 0;
5244
5245 SDValue Op0 = Op.getOperand(0);
5246 SDValue Op1 = Op.getOperand(1);
5247 SDValue CC = Op.getOperand(2);
5248 EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
5249 EVT VT = Op.getValueType();
5250 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
5251 SDLoc dl(Op);
5252
5253 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
5254 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
5255 // Special-case integer 64-bit equality comparisons. They aren't legal,
5256 // but they can be lowered with a few vector instructions.
5257 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
5258 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
5259 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
5260 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
5261 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
5262 DAG.getCondCode(ISD::SETEQ));
5263 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
5264 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
5265 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
5266 if (SetCCOpcode == ISD::SETNE)
5267 Merged = DAG.getNOT(dl, Merged, CmpVT);
5268 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
5269 return Merged;
5270 }
5271
5272 if (CmpVT.getVectorElementType() == MVT::i64)
5273 // 64-bit comparisons are not legal in general.
5274 return SDValue();
5275
5276 if (Op1.getValueType().isFloatingPoint()) {
5277 switch (SetCCOpcode) {
5278 default: llvm_unreachable("Illegal FP comparison")::llvm::llvm_unreachable_internal("Illegal FP comparison", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5278)
;
5279 case ISD::SETUNE:
5280 case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5281 case ISD::SETOEQ:
5282 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5283 case ISD::SETOLT:
5284 case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5285 case ISD::SETOGT:
5286 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5287 case ISD::SETOLE:
5288 case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5289 case ISD::SETOGE:
5290 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5291 case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5292 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
5293 case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5294 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
5295 case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5296 case ISD::SETONE:
5297 // Expand this to (OLT | OGT).
5298 TmpOp0 = Op0;
5299 TmpOp1 = Op1;
5300 Opc = ISD::OR;
5301 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5302 Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
5303 break;
5304 case ISD::SETUO:
5305 Invert = true;
5306 LLVM_FALLTHROUGH[[clang::fallthrough]];
5307 case ISD::SETO:
5308 // Expand this to (OLT | OGE).
5309 TmpOp0 = Op0;
5310 TmpOp1 = Op1;
5311 Opc = ISD::OR;
5312 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5313 Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
5314 break;
5315 }
5316 } else {
5317 // Integer comparisons.
5318 switch (SetCCOpcode) {
5319 default: llvm_unreachable("Illegal integer comparison")::llvm::llvm_unreachable_internal("Illegal integer comparison"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5319)
;
5320 case ISD::SETNE: Invert = true;
5321 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5322 case ISD::SETLT: Swap = true;
5323 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5324 case ISD::SETLE: Swap = true;
5325 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5326 case ISD::SETULT: Swap = true;
5327 case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
5328 case ISD::SETULE: Swap = true;
5329 case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
5330 }
5331
5332 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
5333 if (Opc == ARMISD::VCEQ) {
5334
5335 SDValue AndOp;
5336 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5337 AndOp = Op0;
5338 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
5339 AndOp = Op1;
5340
5341 // Ignore bitconvert.
5342 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
5343 AndOp = AndOp.getOperand(0);
5344
5345 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
5346 Opc = ARMISD::VTST;
5347 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
5348 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
5349 Invert = !Invert;
5350 }
5351 }
5352 }
5353
5354 if (Swap)
5355 std::swap(Op0, Op1);
5356
5357 // If one of the operands is a constant vector zero, attempt to fold the
5358 // comparison to a specialized compare-against-zero form.
5359 SDValue SingleOp;
5360 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5361 SingleOp = Op0;
5362 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
5363 if (Opc == ARMISD::VCGE)
5364 Opc = ARMISD::VCLEZ;
5365 else if (Opc == ARMISD::VCGT)
5366 Opc = ARMISD::VCLTZ;
5367 SingleOp = Op1;
5368 }
5369
5370 SDValue Result;
5371 if (SingleOp.getNode()) {
5372 switch (Opc) {
5373 case ARMISD::VCEQ:
5374 Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
5375 case ARMISD::VCGE:
5376 Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
5377 case ARMISD::VCLEZ:
5378 Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
5379 case ARMISD::VCGT:
5380 Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
5381 case ARMISD::VCLTZ:
5382 Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
5383 default:
5384 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5385 }
5386 } else {
5387 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5388 }
5389
5390 Result = DAG.getSExtOrTrunc(Result, dl, VT);
5391
5392 if (Invert)
5393 Result = DAG.getNOT(dl, Result, VT);
5394
5395 return Result;
5396}
5397
5398static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) {
5399 SDValue LHS = Op.getOperand(0);
5400 SDValue RHS = Op.getOperand(1);
5401 SDValue Carry = Op.getOperand(2);
5402 SDValue Cond = Op.getOperand(3);
5403 SDLoc DL(Op);
5404
5405 assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.")((LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."
) ? static_cast<void> (0) : __assert_fail ("LHS.getSimpleValueType().isInteger() && \"SETCCE is integer only.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5405, __PRETTY_FUNCTION__))
;
5406
5407 assert(Carry.getOpcode() != ISD::CARRY_FALSE)((Carry.getOpcode() != ISD::CARRY_FALSE) ? static_cast<void
> (0) : __assert_fail ("Carry.getOpcode() != ISD::CARRY_FALSE"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5407, __PRETTY_FUNCTION__))
;
5408 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
5409 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
5410
5411 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
5412 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
5413 SDValue ARMcc = DAG.getConstant(
5414 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
5415 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5416 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
5417 Cmp.getValue(1), SDValue());
5418 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
5419 CCR, Chain.getValue(1));
5420}
5421
5422/// isNEONModifiedImm - Check if the specified splat value corresponds to a
5423/// valid vector constant for a NEON instruction with a "modified immediate"
5424/// operand (e.g., VMOV). If so, return the encoded value.
5425static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
5426 unsigned SplatBitSize, SelectionDAG &DAG,
5427 const SDLoc &dl, EVT &VT, bool is128Bits,
5428 NEONModImmType type) {
5429 unsigned OpCmode, Imm;
5430
5431 // SplatBitSize is set to the smallest size that splats the vector, so a
5432 // zero vector will always have SplatBitSize == 8. However, NEON modified
5433 // immediate instructions others than VMOV do not support the 8-bit encoding
5434 // of a zero vector, and the default encoding of zero is supposed to be the
5435 // 32-bit version.
5436 if (SplatBits == 0)
5437 SplatBitSize = 32;
5438
5439 switch (SplatBitSize) {
5440 case 8:
5441 if (type != VMOVModImm)
5442 return SDValue();
5443 // Any 1-byte value is OK. Op=0, Cmode=1110.
5444 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big")(((SplatBits & ~0xff) == 0 && "one byte splat value is too big"
) ? static_cast<void> (0) : __assert_fail ("(SplatBits & ~0xff) == 0 && \"one byte splat value is too big\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5444, __PRETTY_FUNCTION__))
;
5445 OpCmode = 0xe;
5446 Imm = SplatBits;
5447 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
5448 break;
5449
5450 case 16:
5451 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
5452 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
5453 if ((SplatBits & ~0xff) == 0) {
5454 // Value = 0x00nn: Op=x, Cmode=100x.
5455 OpCmode = 0x8;
5456 Imm = SplatBits;
5457 break;
5458 }
5459 if ((SplatBits & ~0xff00) == 0) {
5460 // Value = 0xnn00: Op=x, Cmode=101x.
5461 OpCmode = 0xa;
5462 Imm = SplatBits >> 8;
5463 break;
5464 }
5465 return SDValue();
5466
5467 case 32:
5468 // NEON's 32-bit VMOV supports splat values where:
5469 // * only one byte is nonzero, or
5470 // * the least significant byte is 0xff and the second byte is nonzero, or
5471 // * the least significant 2 bytes are 0xff and the third is nonzero.
5472 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
5473 if ((SplatBits & ~0xff) == 0) {
5474 // Value = 0x000000nn: Op=x, Cmode=000x.
5475 OpCmode = 0;
5476 Imm = SplatBits;
5477 break;
5478 }
5479 if ((SplatBits & ~0xff00) == 0) {
5480 // Value = 0x0000nn00: Op=x, Cmode=001x.
5481 OpCmode = 0x2;
5482 Imm = SplatBits >> 8;
5483 break;
5484 }
5485 if ((SplatBits & ~0xff0000) == 0) {
5486 // Value = 0x00nn0000: Op=x, Cmode=010x.
5487 OpCmode = 0x4;
5488 Imm = SplatBits >> 16;
5489 break;
5490 }
5491 if ((SplatBits & ~0xff000000) == 0) {
5492 // Value = 0xnn000000: Op=x, Cmode=011x.
5493 OpCmode = 0x6;
5494 Imm = SplatBits >> 24;
5495 break;
5496 }
5497
5498 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
5499 if (type == OtherModImm) return SDValue();
5500
5501 if ((SplatBits & ~0xffff) == 0 &&
5502 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
5503 // Value = 0x0000nnff: Op=x, Cmode=1100.
5504 OpCmode = 0xc;
5505 Imm = SplatBits >> 8;
5506 break;
5507 }
5508
5509 if ((SplatBits & ~0xffffff) == 0 &&
5510 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
5511 // Value = 0x00nnffff: Op=x, Cmode=1101.
5512 OpCmode = 0xd;
5513 Imm = SplatBits >> 16;
5514 break;
5515 }
5516
5517 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
5518 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
5519 // VMOV.I32. A (very) minor optimization would be to replicate the value
5520 // and fall through here to test for a valid 64-bit splat. But, then the
5521 // caller would also need to check and handle the change in size.
5522 return SDValue();
5523
5524 case 64: {
5525 if (type != VMOVModImm)
5526 return SDValue();
5527 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
5528 uint64_t BitMask = 0xff;
5529 uint64_t Val = 0;
5530 unsigned ImmMask = 1;
5531 Imm = 0;
5532 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
5533 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
5534 Val |= BitMask;
5535 Imm |= ImmMask;
5536 } else if ((SplatBits & BitMask) != 0) {
5537 return SDValue();
5538 }
5539 BitMask <<= 8;
5540 ImmMask <<= 1;
5541 }
5542
5543 if (DAG.getDataLayout().isBigEndian())
5544 // swap higher and lower 32 bit word
5545 Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
5546
5547 // Op=1, Cmode=1110.
5548 OpCmode = 0x1e;
5549 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
5550 break;
5551 }
5552
5553 default:
5554 llvm_unreachable("unexpected size for isNEONModifiedImm")::llvm::llvm_unreachable_internal("unexpected size for isNEONModifiedImm"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5554)
;
5555 }
5556
5557 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
5558 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
5559}
5560
5561SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
5562 const ARMSubtarget *ST) const {
5563 bool IsDouble = Op.getValueType() == MVT::f64;
5564 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
5565 const APFloat &FPVal = CFP->getValueAPF();
5566
5567 // Prevent floating-point constants from using literal loads
5568 // when execute-only is enabled.
5569 if (ST->genExecuteOnly()) {
5570 APInt INTVal = FPVal.bitcastToAPInt();
5571 SDLoc DL(CFP);
5572 if (IsDouble) {
5573 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
5574 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
5575 if (!ST->isLittle())
5576 std::swap(Lo, Hi);
5577 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
5578 } else {
5579 return DAG.getConstant(INTVal, DL, MVT::i32);
5580 }
5581 }
5582
5583 if (!ST->hasVFP3())
5584 return SDValue();
5585
5586 // Use the default (constant pool) lowering for double constants when we have
5587 // an SP-only FPU
5588 if (IsDouble && Subtarget->isFPOnlySP())
5589 return SDValue();
5590
5591 // Try splatting with a VMOV.f32...
5592 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
5593
5594 if (ImmVal != -1) {
5595 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
5596 // We have code in place to select a valid ConstantFP already, no need to
5597 // do any mangling.
5598 return Op;
5599 }
5600
5601 // It's a float and we are trying to use NEON operations where
5602 // possible. Lower it to a splat followed by an extract.
5603 SDLoc DL(Op);
5604 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
5605 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
5606 NewVal);
5607 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
5608 DAG.getConstant(0, DL, MVT::i32));
5609 }
5610
5611 // The rest of our options are NEON only, make sure that's allowed before
5612 // proceeding..
5613 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
5614 return SDValue();
5615
5616 EVT VMovVT;
5617 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
5618
5619 // It wouldn't really be worth bothering for doubles except for one very
5620 // important value, which does happen to match: 0.0. So make sure we don't do
5621 // anything stupid.
5622 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
5623 return SDValue();
5624
5625 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
5626 SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
5627 VMovVT, false, VMOVModImm);
5628 if (NewVal != SDValue()) {
5629 SDLoc DL(Op);
5630 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
5631 NewVal);
5632 if (IsDouble)
5633 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5634
5635 // It's a float: cast and extract a vector element.
5636 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5637 VecConstant);
5638 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5639 DAG.getConstant(0, DL, MVT::i32));
5640 }
5641
5642 // Finally, try a VMVN.i32
5643 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
5644 false, VMVNModImm);
5645 if (NewVal != SDValue()) {
5646 SDLoc DL(Op);
5647 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
5648
5649 if (IsDouble)
5650 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5651
5652 // It's a float: cast and extract a vector element.
5653 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5654 VecConstant);
5655 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5656 DAG.getConstant(0, DL, MVT::i32));
5657 }
5658
5659 return SDValue();
5660}
5661
5662// check if an VEXT instruction can handle the shuffle mask when the
5663// vector sources of the shuffle are the same.
5664static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5665 unsigned NumElts = VT.getVectorNumElements();
5666
5667 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5668 if (M[0] < 0)
5669 return false;
5670
5671 Imm = M[0];
5672
5673 // If this is a VEXT shuffle, the immediate value is the index of the first
5674 // element. The other shuffle indices must be the successive elements after
5675 // the first one.
5676 unsigned ExpectedElt = Imm;
5677 for (unsigned i = 1; i < NumElts; ++i) {
5678 // Increment the expected index. If it wraps around, just follow it
5679 // back to index zero and keep going.
5680 ++ExpectedElt;
5681 if (ExpectedElt == NumElts)
5682 ExpectedElt = 0;
5683
5684 if (M[i] < 0) continue; // ignore UNDEF indices
5685 if (ExpectedElt != static_cast<unsigned>(M[i]))
5686 return false;
5687 }
5688
5689 return true;
5690}
5691
5692static bool isVEXTMask(ArrayRef<int> M, EVT VT,
5693 bool &ReverseVEXT, unsigned &Imm) {
5694 unsigned NumElts = VT.getVectorNumElements();
5695 ReverseVEXT = false;
5696
5697 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5698 if (M[0] < 0)
5699 return false;
5700
5701 Imm = M[0];
5702
5703 // If this is a VEXT shuffle, the immediate value is the index of the first
5704 // element. The other shuffle indices must be the successive elements after
5705 // the first one.
5706 unsigned ExpectedElt = Imm;
5707 for (unsigned i = 1; i < NumElts; ++i) {
5708 // Increment the expected index. If it wraps around, it may still be
5709 // a VEXT but the source vectors must be swapped.
5710 ExpectedElt += 1;
5711 if (ExpectedElt == NumElts * 2) {
5712 ExpectedElt = 0;
5713 ReverseVEXT = true;
5714 }
5715
5716 if (M[i] < 0) continue; // ignore UNDEF indices
5717 if (ExpectedElt != static_cast<unsigned>(M[i]))
5718 return false;
5719 }
5720
5721 // Adjust the index value if the source operands will be swapped.
5722 if (ReverseVEXT)
5723 Imm -= NumElts;
5724
5725 return true;
5726}
5727
5728/// isVREVMask - Check if a vector shuffle corresponds to a VREV
5729/// instruction with the specified blocksize. (The order of the elements
5730/// within each block of the vector is reversed.)
5731static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5732 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&(((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64") ? static_cast
<void> (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5733, __PRETTY_FUNCTION__))
5733 "Only possible block sizes for VREV are: 16, 32, 64")(((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64") ? static_cast
<void> (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 5733, __PRETTY_FUNCTION__))
;
5734
5735 unsigned EltSz = VT.getScalarSizeInBits();
5736 if (EltSz == 64)
5737 return false;
5738
5739 unsigned NumElts = VT.getVectorNumElements();
5740 unsigned BlockElts = M[0] + 1;
5741 // If the first shuffle index is UNDEF, be optimistic.
5742 if (M[0] < 0)
5743 BlockElts = BlockSize / EltSz;
5744
5745 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5746 return false;
5747
5748 for (unsigned i = 0; i < NumElts; ++i) {
5749 if (M[i] < 0) continue; // ignore UNDEF indices
5750 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
5751 return false;
5752 }
5753
5754 return true;
5755}
5756
5757static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
5758 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
5759 // range, then 0 is placed into the resulting vector. So pretty much any mask
5760 // of 8 elements can work here.
5761 return VT == MVT::v8i8 && M.size() == 8;
5762}
5763
5764// Checks whether the shuffle mask represents a vector transpose (VTRN) by
5765// checking that pairs of elements in the shuffle mask represent the same index
5766// in each vector, incrementing the expected index by 2 at each step.
5767// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
5768// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
5769// v2={e,f,g,h}
5770// WhichResult gives the offset for each element in the mask based on which
5771// of the two results it belongs to.
5772//
5773// The transpose can be represented either as:
5774// result1 = shufflevector v1, v2, result1_shuffle_mask
5775// result2 = shufflevector v1, v2, result2_shuffle_mask
5776// where v1/v2 and the shuffle masks have the same number of elements
5777// (here WhichResult (see below) indicates which result is being checked)
5778//
5779// or as:
5780// results = shufflevector v1, v2, shuffle_mask
5781// where both results are returned in one vector and the shuffle mask has twice
5782// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
5783// want to check the low half and high half of the shuffle mask as if it were
5784// the other case
5785static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5786 unsigned EltSz = VT.getScalarSizeInBits();
5787 if (EltSz == 64)
5788 return false;
5789
5790 unsigned NumElts = VT.getVectorNumElements();
5791 if (M.size() != NumElts && M.size() != NumElts*2)
5792 return false;
5793
5794 // If the mask is twice as long as the input vector then we need to check the
5795 // upper and lower parts of the mask with a matching value for WhichResult
5796 // FIXME: A mask with only even values will be rejected in case the first
5797 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
5798 // M[0] is used to determine WhichResult
5799 for (unsigned i = 0; i < M.size(); i += NumElts) {
5800 if (M.size() == NumElts * 2)
5801 WhichResult = i / NumElts;
5802 else
5803 WhichResult = M[i] == 0 ? 0 : 1;
5804 for (unsigned j = 0; j < NumElts; j += 2) {
5805 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5806 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
5807 return false;
5808 }
5809 }
5810
5811 if (M.size() == NumElts*2)
5812 WhichResult = 0;
5813
5814 return true;
5815}
5816
5817/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
5818/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5819/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5820static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5821 unsigned EltSz = VT.getScalarSizeInBits();
5822 if (EltSz == 64)
5823 return false;
5824
5825 unsigned NumElts = VT.getVectorNumElements();
5826 if (M.size() != NumElts && M.size() != NumElts*2)
5827 return false;
5828
5829 for (unsigned i = 0; i < M.size(); i += NumElts) {
5830 if (M.size() == NumElts * 2)
5831 WhichResult = i / NumElts;
5832 else
5833 WhichResult = M[i] == 0 ? 0 : 1;
5834 for (unsigned j = 0; j < NumElts; j += 2) {
5835 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5836 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
5837 return false;
5838 }
5839 }
5840
5841 if (M.size() == NumElts*2)
5842 WhichResult = 0;
5843
5844 return true;
5845}
5846
5847// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
5848// that the mask elements are either all even and in steps of size 2 or all odd
5849// and in steps of size 2.
5850// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
5851// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
5852// v2={e,f,g,h}
5853// Requires similar checks to that of isVTRNMask with
5854// respect the how results are returned.
5855static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5856 unsigned EltSz = VT.getScalarSizeInBits();
5857 if (EltSz == 64)
5858 return false;
5859
5860 unsigned NumElts = VT.getVectorNumElements();
5861 if (M.size() != NumElts && M.size() != NumElts*2)
5862 return false;
5863
5864 for (unsigned i = 0; i < M.size(); i += NumElts) {
5865 WhichResult = M[i] == 0 ? 0 : 1;
5866 for (unsigned j = 0; j < NumElts; ++j) {
5867 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
5868 return false;
5869 }
5870 }
5871
5872 if (M.size() == NumElts*2)
5873 WhichResult = 0;
5874
5875 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5876 if (VT.is64BitVector() && EltSz == 32)
5877 return false;
5878
5879 return true;
5880}
5881
5882/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
5883/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5884/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5885static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5886 unsigned EltSz = VT.getScalarSizeInBits();
5887 if (EltSz == 64)
5888 return false;
5889
5890 unsigned NumElts = VT.getVectorNumElements();
5891 if (M.size() != NumElts && M.size() != NumElts*2)
5892 return false;
5893
5894 unsigned Half = NumElts / 2;
5895 for (unsigned i = 0; i < M.size(); i += NumElts) {
5896 WhichResult = M[i] == 0 ? 0 : 1;
5897 for (unsigned j = 0; j < NumElts; j += Half) {
5898 unsigned Idx = WhichResult;
5899 for (unsigned k = 0; k < Half; ++k) {
5900 int MIdx = M[i + j + k];
5901 if (MIdx >= 0 && (unsigned) MIdx != Idx)
5902 return false;
5903 Idx += 2;
5904 }
5905 }
5906 }
5907
5908 if (M.size() == NumElts*2)
5909 WhichResult = 0;
5910
5911 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5912 if (VT.is64BitVector() && EltSz == 32)
5913 return false;
5914
5915 return true;
5916}
5917
5918// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
5919// that pairs of elements of the shufflemask represent the same index in each
5920// vector incrementing sequentially through the vectors.
5921// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
5922// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
5923// v2={e,f,g,h}
5924// Requires similar checks to that of isVTRNMask with respect the how results
5925// are returned.
5926static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5927 unsigned EltSz = VT.getScalarSizeInBits();
5928 if (EltSz == 64)
5929 return false;
5930
5931 unsigned NumElts = VT.getVectorNumElements();
5932 if (M.size() != NumElts && M.size() != NumElts*2)
5933 return false;
5934
5935 for (unsigned i = 0; i < M.size(); i += NumElts) {
5936 WhichResult = M[i] == 0 ? 0 : 1;
5937 unsigned Idx = WhichResult * NumElts / 2;
5938 for (unsigned j = 0; j < NumElts; j += 2) {
5939 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
5940 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
5941 return false;
5942 Idx += 1;
5943 }
5944 }
5945
5946 if (M.size() == NumElts*2)
5947 WhichResult = 0;
5948
5949 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5950 if (VT.is64BitVector() && EltSz == 32)
5951 return false;
5952
5953 return true;
5954}
5955
5956/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
5957/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5958/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5959static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5960 unsigned EltSz = VT.getScalarSizeInBits();
5961 if (EltSz == 64)
5962 return false;
5963
5964 unsigned NumElts = VT.getVectorNumElements();
5965 if (M.size() != NumElts && M.size() != NumElts*2)
5966 return false;
5967
5968 for (unsigned i = 0; i < M.size(); i += NumElts) {
5969 WhichResult = M[i] == 0 ? 0 : 1;
5970 unsigned Idx = WhichResult * NumElts / 2;
5971 for (unsigned j = 0; j < NumElts; j += 2) {
5972 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
5973 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
5974 return false;
5975 Idx += 1;
5976 }
5977 }
5978
5979 if (M.size() == NumElts*2)
5980 WhichResult = 0;
5981
5982 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5983 if (VT.is64BitVector() && EltSz == 32)
5984 return false;
5985
5986 return true;
5987}
5988
5989/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
5990/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
5991static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
5992 unsigned &WhichResult,
5993 bool &isV_UNDEF) {
5994 isV_UNDEF = false;
5995 if (isVTRNMask(ShuffleMask, VT, WhichResult))
5996 return ARMISD::VTRN;
5997 if (isVUZPMask(ShuffleMask, VT, WhichResult))
5998 return ARMISD::VUZP;
5999 if (isVZIPMask(ShuffleMask, VT, WhichResult))
6000 return ARMISD::VZIP;
6001
6002 isV_UNDEF = true;
6003 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
6004 return ARMISD::VTRN;
6005 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6006 return ARMISD::VUZP;
6007 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
6008 return ARMISD::VZIP;
6009
6010 return 0;
6011}
6012
6013/// \return true if this is a reverse operation on an vector.
6014static bool isReverseMask(ArrayRef<int> M, EVT VT) {
6015 unsigned NumElts = VT.getVectorNumElements();
6016 // Make sure the mask has the right size.
6017 if (NumElts != M.size())
6018 return false;
6019
6020 // Look for <15, ..., 3, -1, 1, 0>.
6021 for (unsigned i = 0; i != NumElts; ++i)
6022 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
6023 return false;
6024
6025 return true;
6026}
6027
6028// If N is an integer constant that can be moved into a register in one
6029// instruction, return an SDValue of such a constant (will become a MOV
6030// instruction). Otherwise return null.
6031static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
6032 const ARMSubtarget *ST, const SDLoc &dl) {
6033 uint64_t Val;
6034 if (!isa<ConstantSDNode>(N))
6035 return SDValue();
6036 Val = cast<ConstantSDNode>(N)->getZExtValue();
6037
6038 if (ST->isThumb1Only()) {
6039 if (Val <= 255 || ~Val <= 255)
6040 return DAG.getConstant(Val, dl, MVT::i32);
6041 } else {
6042 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
6043 return DAG.getConstant(Val, dl, MVT::i32);
6044 }
6045 return SDValue();
6046}
6047
6048// If this is a case we can't handle, return null and let the default
6049// expansion code take care of it.
6050SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
6051 const ARMSubtarget *ST) const {
6052 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
6053 SDLoc dl(Op);
6054 EVT VT = Op.getValueType();
6055
6056 APInt SplatBits, SplatUndef;
6057 unsigned SplatBitSize;
6058 bool HasAnyUndefs;
6059 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6060 if (SplatUndef.isAllOnesValue())
6061 return DAG.getUNDEF(VT);
6062
6063 if (SplatBitSize <= 64) {
6064 // Check if an immediate VMOV works.
6065 EVT VmovVT;
6066 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
6067 SplatUndef.getZExtValue(), SplatBitSize,
6068 DAG, dl, VmovVT, VT.is128BitVector(),
6069 VMOVModImm);
6070 if (Val.getNode()) {
6071 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
6072 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6073 }
6074
6075 // Try an immediate VMVN.
6076 uint64_t NegatedImm = (~SplatBits).getZExtValue();
6077 Val = isNEONModifiedImm(NegatedImm,
6078 SplatUndef.getZExtValue(), SplatBitSize,
6079 DAG, dl, VmovVT, VT.is128BitVector(),
6080 VMVNModImm);
6081 if (Val.getNode()) {
6082 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
6083 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6084 }
6085
6086 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
6087 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
6088 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
6089 if (ImmVal != -1) {
6090 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
6091 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
6092 }
6093 }
6094 }
6095 }
6096
6097 // Scan through the operands to see if only one value is used.
6098 //
6099 // As an optimisation, even if more than one value is used it may be more
6100 // profitable to splat with one value then change some lanes.
6101 //
6102 // Heuristically we decide to do this if the vector has a "dominant" value,
6103 // defined as splatted to more than half of the lanes.
6104 unsigned NumElts = VT.getVectorNumElements();
6105 bool isOnlyLowElement = true;
6106 bool usesOnlyOneValue = true;
6107 bool hasDominantValue = false;
6108 bool isConstant = true;
6109
6110 // Map of the number of times a particular SDValue appears in the
6111 // element list.
6112 DenseMap<SDValue, unsigned> ValueCounts;
6113 SDValue Value;
6114 for (unsigned i = 0; i < NumElts; ++i) {
6115 SDValue V = Op.getOperand(i);
6116 if (V.isUndef())
6117 continue;
6118 if (i > 0)
6119 isOnlyLowElement = false;
6120 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6121 isConstant = false;
6122
6123 ValueCounts.insert(std::make_pair(V, 0));
6124 unsigned &Count = ValueCounts[V];
6125
6126 // Is this value dominant? (takes up more than half of the lanes)
6127 if (++Count > (NumElts / 2)) {
6128 hasDominantValue = true;
6129 Value = V;
6130 }
6131 }
6132 if (ValueCounts.size() != 1)
6133 usesOnlyOneValue = false;
6134 if (!Value.getNode() && !ValueCounts.empty())
6135 Value = ValueCounts.begin()->first;
6136
6137 if (ValueCounts.empty())
6138 return DAG.getUNDEF(VT);
6139
6140 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
6141 // Keep going if we are hitting this case.
6142 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
6143 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6144
6145 unsigned EltSize = VT.getScalarSizeInBits();
6146
6147 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
6148 // i32 and try again.
6149 if (hasDominantValue && EltSize <= 32) {
6150 if (!isConstant) {
6151 SDValue N;
6152
6153 // If we are VDUPing a value that comes directly from a vector, that will
6154 // cause an unnecessary move to and from a GPR, where instead we could
6155 // just use VDUPLANE. We can only do this if the lane being extracted
6156 // is at a constant index, as the VDUP from lane instructions only have
6157 // constant-index forms.
6158 ConstantSDNode *constIndex;
6159 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6160 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
6161 // We need to create a new undef vector to use for the VDUPLANE if the
6162 // size of the vector from which we get the value is different than the
6163 // size of the vector that we need to create. We will insert the element
6164 // such that the register coalescer will remove unnecessary copies.
6165 if (VT != Value->getOperand(0).getValueType()) {
6166 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
6167 VT.getVectorNumElements();
6168 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6169 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
6170 Value, DAG.getConstant(index, dl, MVT::i32)),
6171 DAG.getConstant(index, dl, MVT::i32));
6172 } else
6173 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6174 Value->getOperand(0), Value->getOperand(1));
6175 } else
6176 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
6177
6178 if (!usesOnlyOneValue) {
6179 // The dominant value was splatted as 'N', but we now have to insert
6180 // all differing elements.
6181 for (unsigned I = 0; I < NumElts; ++I) {
6182 if (Op.getOperand(I) == Value)
6183 continue;
6184 SmallVector<SDValue, 3> Ops;
6185 Ops.push_back(N);
6186 Ops.push_back(Op.getOperand(I));
6187 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
6188 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
6189 }
6190 }
6191 return N;
6192 }
6193 if (VT.getVectorElementType().isFloatingPoint()) {
6194 SmallVector<SDValue, 8> Ops;
6195 for (unsigned i = 0; i < NumElts; ++i)
6196 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
6197 Op.getOperand(i)));
6198 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
6199 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6200 Val = LowerBUILD_VECTOR(Val, DAG, ST);
6201 if (Val.getNode())
6202 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6203 }
6204 if (usesOnlyOneValue) {
6205 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
6206 if (isConstant && Val.getNode())
6207 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
6208 }
6209 }
6210
6211 // If all elements are constants and the case above didn't get hit, fall back
6212 // to the default expansion, which will generate a load from the constant
6213 // pool.
6214 if (isConstant)
6215 return SDValue();
6216
6217 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6218 if (NumElts >= 4) {
6219 SDValue shuffle = ReconstructShuffle(Op, DAG);
6220 if (shuffle != SDValue())
6221 return shuffle;
6222 }
6223
6224 if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
6225 // If we haven't found an efficient lowering, try splitting a 128-bit vector
6226 // into two 64-bit vectors; we might discover a better way to lower it.
6227 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
6228 EVT ExtVT = VT.getVectorElementType();
6229 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
6230 SDValue Lower =
6231 DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
6232 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
6233 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
6234 SDValue Upper = DAG.getBuildVector(
6235 HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
6236 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
6237 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
6238 if (Lower && Upper)
6239 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
6240 }
6241
6242 // Vectors with 32- or 64-bit elements can be built by directly assigning
6243 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
6244 // will be legalized.
6245 if (EltSize >= 32) {
6246 // Do the expansion with floating-point types, since that is what the VFP
6247 // registers are defined to use, and since i64 is not legal.
6248 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6249 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6250 SmallVector<SDValue, 8> Ops;
6251 for (unsigned i = 0; i < NumElts; ++i)
6252 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
6253 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6254 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6255 }
6256
6257 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6258 // know the default expansion would otherwise fall back on something even
6259 // worse. For a vector with one or two non-undef values, that's
6260 // scalar_to_vector for the elements followed by a shuffle (provided the
6261 // shuffle is valid for the target) and materialization element by element
6262 // on the stack followed by a load for everything else.
6263 if (!isConstant && !usesOnlyOneValue) {
6264 SDValue Vec = DAG.getUNDEF(VT);
6265 for (unsigned i = 0 ; i < NumElts; ++i) {
6266 SDValue V = Op.getOperand(i);
6267 if (V.isUndef())
6268 continue;
6269 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
6270 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6271 }
6272 return Vec;
6273 }
6274
6275 return SDValue();
6276}
6277
6278// Gather data to see if the operation can be modelled as a
6279// shuffle in combination with VEXTs.
6280SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
6281 SelectionDAG &DAG) const {
6282 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")((Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6282, __PRETTY_FUNCTION__))
;
6283 SDLoc dl(Op);
6284 EVT VT = Op.getValueType();
6285 unsigned NumElts = VT.getVectorNumElements();
6286
6287 struct ShuffleSourceInfo {
6288 SDValue Vec;
6289 unsigned MinElt = std::numeric_limits<unsigned>::max();
6290 unsigned MaxElt = 0;
6291
6292 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
6293 // be compatible with the shuffle we intend to construct. As a result
6294 // ShuffleVec will be some sliding window into the original Vec.
6295 SDValue ShuffleVec;
6296
6297 // Code should guarantee that element i in Vec starts at element "WindowBase
6298 // + i * WindowScale in ShuffleVec".
6299 int WindowBase = 0;
6300 int WindowScale = 1;
6301
6302 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
6303
6304 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
6305 };
6306
6307 // First gather all vectors used as an immediate source for this BUILD_VECTOR
6308 // node.
6309 SmallVector<ShuffleSourceInfo, 2> Sources;
6310 for (unsigned i = 0; i < NumElts; ++i) {
6311 SDValue V = Op.getOperand(i);
6312 if (V.isUndef())
6313 continue;
6314 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
6315 // A shuffle can only come from building a vector from various
6316 // elements of other vectors.
6317 return SDValue();
6318 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
6319 // Furthermore, shuffles require a constant mask, whereas extractelts
6320 // accept variable indices.
6321 return SDValue();
6322 }
6323
6324 // Add this element source to the list if it's not already there.
6325 SDValue SourceVec = V.getOperand(0);
6326 auto Source = llvm::find(Sources, SourceVec);
6327 if (Source == Sources.end())
6328 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
6329
6330 // Update the minimum and maximum lane number seen.
6331 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
6332 Source->MinElt = std::min(Source->MinElt, EltNo);
6333 Source->MaxElt = std::max(Source->MaxElt, EltNo);
6334 }
6335
6336 // Currently only do something sane when at most two source vectors
6337 // are involved.
6338 if (Sources.size() > 2)
6339 return SDValue();
6340
6341 // Find out the smallest element size among result and two sources, and use
6342 // it as element size to build the shuffle_vector.
6343 EVT SmallestEltTy = VT.getVectorElementType();
6344 for (auto &Source : Sources) {
6345 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
6346 if (SrcEltTy.bitsLT(SmallestEltTy))
6347 SmallestEltTy = SrcEltTy;
6348 }
6349 unsigned ResMultiplier =
6350 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
6351 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
6352 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
6353
6354 // If the source vector is too wide or too narrow, we may nevertheless be able
6355 // to construct a compatible shuffle either by concatenating it with UNDEF or
6356 // extracting a suitable range of elements.
6357 for (auto &Src : Sources) {
6358 EVT SrcVT = Src.ShuffleVec.getValueType();
6359
6360 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
6361 continue;
6362
6363 // This stage of the search produces a source with the same element type as
6364 // the original, but with a total width matching the BUILD_VECTOR output.
6365 EVT EltVT = SrcVT.getVectorElementType();
6366 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
6367 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
6368
6369 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
6370 if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
6371 return SDValue();
6372 // We can pad out the smaller vector for free, so if it's part of a
6373 // shuffle...
6374 Src.ShuffleVec =
6375 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
6376 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
6377 continue;
6378 }
6379
6380 if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
6381 return SDValue();
6382
6383 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
6384 // Span too large for a VEXT to cope
6385 return SDValue();
6386 }
6387
6388 if (Src.MinElt >= NumSrcElts) {
6389 // The extraction can just take the second half
6390 Src.ShuffleVec =
6391 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6392 DAG.getConstant(NumSrcElts, dl, MVT::i32));
6393 Src.WindowBase = -NumSrcElts;
6394 } else if (Src.MaxElt < NumSrcElts) {
6395 // The extraction can just take the first half
6396 Src.ShuffleVec =
6397 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6398 DAG.getConstant(0, dl, MVT::i32));
6399 } else {
6400 // An actual VEXT is needed
6401 SDValue VEXTSrc1 =
6402 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6403 DAG.getConstant(0, dl, MVT::i32));
6404 SDValue VEXTSrc2 =
6405 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6406 DAG.getConstant(NumSrcElts, dl, MVT::i32));
6407
6408 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
6409 VEXTSrc2,
6410 DAG.getConstant(Src.MinElt, dl, MVT::i32));
6411 Src.WindowBase = -Src.MinElt;
6412 }
6413 }
6414
6415 // Another possible incompatibility occurs from the vector element types. We
6416 // can fix this by bitcasting the source vectors to the same type we intend
6417 // for the shuffle.
6418 for (auto &Src : Sources) {
6419 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
6420 if (SrcEltTy == SmallestEltTy)
6421 continue;
6422 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)((ShuffleVT.getVectorElementType() == SmallestEltTy) ? static_cast
<void> (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6422, __PRETTY_FUNCTION__))
;
6423 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
6424 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
6425 Src.WindowBase *= Src.WindowScale;
6426 }
6427
6428 // Final sanity check before we try to actually produce a shuffle.
6429 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6431, __PRETTY_FUNCTION__));; } } while (false)
6430 for (auto Src : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6431, __PRETTY_FUNCTION__));; } } while (false)
6431 assert(Src.ShuffleVec.getValueType() == ShuffleVT);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6431, __PRETTY_FUNCTION__));; } } while (false)
6432 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6431, __PRETTY_FUNCTION__));; } } while (false)
;
6433
6434 // The stars all align, our next step is to produce the mask for the shuffle.
6435 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
6436 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
6437 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
6438 SDValue Entry = Op.getOperand(i);
6439 if (Entry.isUndef())
6440 continue;
6441
6442 auto Src = llvm::find(Sources, Entry.getOperand(0));
6443 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
6444
6445 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
6446 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
6447 // segment.
6448 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
6449 int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
6450 VT.getScalarSizeInBits());
6451 int LanesDefined = BitsDefined / BitsPerShuffleLane;
6452
6453 // This source is expected to fill ResMultiplier lanes of the final shuffle,
6454 // starting at the appropriate offset.
6455 int *LaneMask = &Mask[i * ResMultiplier];
6456
6457 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
6458 ExtractBase += NumElts * (Src - Sources.begin());
6459 for (int j = 0; j < LanesDefined; ++j)
6460 LaneMask[j] = ExtractBase + j;
6461 }
6462
6463 // Final check before we try to produce nonsense...
6464 if (!isShuffleMaskLegal(Mask, ShuffleVT))
6465 return SDValue();
6466
6467 // We can't handle more than two sources. This should have already
6468 // been checked before this point.
6469 assert(Sources.size() <= 2 && "Too many sources!")((Sources.size() <= 2 && "Too many sources!") ? static_cast
<void> (0) : __assert_fail ("Sources.size() <= 2 && \"Too many sources!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6469, __PRETTY_FUNCTION__))
;
6470
6471 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
6472 for (unsigned i = 0; i < Sources.size(); ++i)
6473 ShuffleOps[i] = Sources[i].ShuffleVec;
6474
6475 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
6476 ShuffleOps[1], Mask);
6477 return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
6478}
6479
6480/// isShuffleMaskLegal - Targets can use this to indicate that they only
6481/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
6482/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
6483/// are assumed to be legal.
6484bool
6485ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
6486 EVT VT) const {
6487 if (VT.getVectorNumElements() == 4 &&
6488 (VT.is128BitVector() || VT.is64BitVector())) {
6489 unsigned PFIndexes[4];
6490 for (unsigned i = 0; i != 4; ++i) {
6491 if (M[i] < 0)
6492 PFIndexes[i] = 8;
6493 else
6494 PFIndexes[i] = M[i];
6495 }
6496
6497 // Compute the index in the perfect shuffle table.
6498 unsigned PFTableIndex =
6499 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6500 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6501 unsigned Cost = (PFEntry >> 30);
6502
6503 if (Cost <= 4)
6504 return true;
6505 }
6506
6507 bool ReverseVEXT, isV_UNDEF;
6508 unsigned Imm, WhichResult;
6509
6510 unsigned EltSize = VT.getScalarSizeInBits();
6511 return (EltSize >= 32 ||
6512 ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
6513 isVREVMask(M, VT, 64) ||
6514 isVREVMask(M, VT, 32) ||
6515 isVREVMask(M, VT, 16) ||
6516 isVEXTMask(M, VT, ReverseVEXT, Imm) ||
6517 isVTBLMask(M, VT) ||
6518 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
6519 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
6520}
6521
6522/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
6523/// the specified operations to build the shuffle.
6524static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
6525 SDValue RHS, SelectionDAG &DAG,
6526 const SDLoc &dl) {
6527 unsigned OpNum = (PFEntry >> 26) & 0x0F;
6528 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
6529 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
6530
6531 enum {
6532 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6533 OP_VREV,
6534 OP_VDUP0,
6535 OP_VDUP1,
6536 OP_VDUP2,
6537 OP_VDUP3,
6538 OP_VEXT1,
6539 OP_VEXT2,
6540 OP_VEXT3,
6541 OP_VUZPL, // VUZP, left result
6542 OP_VUZPR, // VUZP, right result
6543 OP_VZIPL, // VZIP, left result
6544 OP_VZIPR, // VZIP, right result
6545 OP_VTRNL, // VTRN, left result
6546 OP_VTRNR // VTRN, right result
6547 };
6548
6549 if (OpNum == OP_COPY) {
6550 if (LHSID == (1*9+2)*9+3) return LHS;
6551 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")((LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!") ?
static_cast<void> (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6551, __PRETTY_FUNCTION__))
;
6552 return RHS;
6553 }
6554
6555 SDValue OpLHS, OpRHS;
6556 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
6557 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
6558 EVT VT = OpLHS.getValueType();
6559
6560 switch (OpNum) {
6561 default: llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6561)
;
6562 case OP_VREV:
6563 // VREV divides the vector in half and swaps within the half.
6564 if (VT.getVectorElementType() == MVT::i32 ||
6565 VT.getVectorElementType() == MVT::f32)
6566 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
6567 // vrev <4 x i16> -> VREV32
6568 if (VT.getVectorElementType() == MVT::i16)
6569 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
6570 // vrev <4 x i8> -> VREV16
6571 assert(VT.getVectorElementType() == MVT::i8)((VT.getVectorElementType() == MVT::i8) ? static_cast<void
> (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6571, __PRETTY_FUNCTION__))
;
6572 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
6573 case OP_VDUP0:
6574 case OP_VDUP1:
6575 case OP_VDUP2:
6576 case OP_VDUP3:
6577 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6578 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
6579 case OP_VEXT1:
6580 case OP_VEXT2:
6581 case OP_VEXT3:
6582 return DAG.getNode(ARMISD::VEXT, dl, VT,
6583 OpLHS, OpRHS,
6584 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
6585 case OP_VUZPL:
6586 case OP_VUZPR:
6587 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
6588 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
6589 case OP_VZIPL:
6590 case OP_VZIPR:
6591 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
6592 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
6593 case OP_VTRNL:
6594 case OP_VTRNR:
6595 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
6596 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
6597 }
6598}
6599
6600static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
6601 ArrayRef<int> ShuffleMask,
6602 SelectionDAG &DAG) {
6603 // Check to see if we can use the VTBL instruction.
6604 SDValue V1 = Op.getOperand(0);
6605 SDValue V2 = Op.getOperand(1);
6606 SDLoc DL(Op);
6607
6608 SmallVector<SDValue, 8> VTBLMask;
6609 for (ArrayRef<int>::iterator
6610 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
6611 VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
6612
6613 if (V2.getNode()->isUndef())
6614 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
6615 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6616
6617 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
6618 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6619}
6620
6621static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
6622 SelectionDAG &DAG) {
6623 SDLoc DL(Op);
6624 SDValue OpLHS = Op.getOperand(0);
6625 EVT VT = OpLHS.getValueType();
6626
6627 assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&(((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6628, __PRETTY_FUNCTION__))
6628 "Expect an v8i16/v16i8 type")(((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6628, __PRETTY_FUNCTION__))
;
6629 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
6630 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
6631 // extract the first 8 bytes into the top double word and the last 8 bytes
6632 // into the bottom double word. The v8i16 case is similar.
6633 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
6634 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
6635 DAG.getConstant(ExtractNum, DL, MVT::i32));
6636}
6637
6638static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
6639 SDValue V1 = Op.getOperand(0);
6640 SDValue V2 = Op.getOperand(1);
6641 SDLoc dl(Op);
6642 EVT VT = Op.getValueType();
6643 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
6644
6645 // Convert shuffles that are directly supported on NEON to target-specific
6646 // DAG nodes, instead of keeping them as shuffles and matching them again
6647 // during code selection. This is more efficient and avoids the possibility
6648 // of inconsistencies between legalization and selection.
6649 // FIXME: floating-point vectors should be canonicalized to integer vectors
6650 // of the same time so that they get CSEd properly.
6651 ArrayRef<int> ShuffleMask = SVN->getMask();
6652
6653 unsigned EltSize = VT.getScalarSizeInBits();
6654 if (EltSize <= 32) {
6655 if (SVN->isSplat()) {
6656 int Lane = SVN->getSplatIndex();
6657 // If this is undef splat, generate it via "just" vdup, if possible.
6658 if (Lane == -1) Lane = 0;
6659
6660 // Test if V1 is a SCALAR_TO_VECTOR.
6661 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
6662 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6663 }
6664 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
6665 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
6666 // reaches it).
6667 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
6668 !isa<ConstantSDNode>(V1.getOperand(0))) {
6669 bool IsScalarToVector = true;
6670 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
6671 if (!V1.getOperand(i).isUndef()) {
6672 IsScalarToVector = false;
6673 break;
6674 }
6675 if (IsScalarToVector)
6676 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6677 }
6678 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
6679 DAG.getConstant(Lane, dl, MVT::i32));
6680 }
6681
6682 bool ReverseVEXT;
6683 unsigned Imm;
6684 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
6685 if (ReverseVEXT)
6686 std::swap(V1, V2);
6687 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
6688 DAG.getConstant(Imm, dl, MVT::i32));
6689 }
6690
6691 if (isVREVMask(ShuffleMask, VT, 64))
6692 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
6693 if (isVREVMask(ShuffleMask, VT, 32))
6694 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
6695 if (isVREVMask(ShuffleMask, VT, 16))
6696 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
6697
6698 if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
6699 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
6700 DAG.getConstant(Imm, dl, MVT::i32));
6701 }
6702
6703 // Check for Neon shuffles that modify both input vectors in place.
6704 // If both results are used, i.e., if there are two shuffles with the same
6705 // source operands and with masks corresponding to both results of one of
6706 // these operations, DAG memoization will ensure that a single node is
6707 // used for both shuffles.
6708 unsigned WhichResult;
6709 bool isV_UNDEF;
6710 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6711 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
6712 if (isV_UNDEF)
6713 V2 = V1;
6714 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
6715 .getValue(WhichResult);
6716 }
6717
6718 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
6719 // shuffles that produce a result larger than their operands with:
6720 // shuffle(concat(v1, undef), concat(v2, undef))
6721 // ->
6722 // shuffle(concat(v1, v2), undef)
6723 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
6724 //
6725 // This is useful in the general case, but there are special cases where
6726 // native shuffles produce larger results: the two-result ops.
6727 //
6728 // Look through the concat when lowering them:
6729 // shuffle(concat(v1, v2), undef)
6730 // ->
6731 // concat(VZIP(v1, v2):0, :1)
6732 //
6733 if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
6734 SDValue SubV1 = V1->getOperand(0);
6735 SDValue SubV2 = V1->getOperand(1);
6736 EVT SubVT = SubV1.getValueType();
6737
6738 // We expect these to have been canonicalized to -1.
6739 assert(llvm::all_of(ShuffleMask, [&](int i) {((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6741, __PRETTY_FUNCTION__))
6740 return i < (int)VT.getVectorNumElements();((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6741, __PRETTY_FUNCTION__))
6741 }) && "Unexpected shuffle index into UNDEF operand!")((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6741, __PRETTY_FUNCTION__))
;
6742
6743 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6744 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
6745 if (isV_UNDEF)
6746 SubV2 = SubV1;
6747 assert((WhichResult == 0) &&(((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? static_cast<void> (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6748, __PRETTY_FUNCTION__))
6748 "In-place shuffle of concat can only have one result!")(((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? static_cast<void> (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6748, __PRETTY_FUNCTION__))
;
6749 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
6750 SubV1, SubV2);
6751 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
6752 Res.getValue(1));
6753 }
6754 }
6755 }
6756
6757 // If the shuffle is not directly supported and it has 4 elements, use
6758 // the PerfectShuffle-generated table to synthesize it from other shuffles.
6759 unsigned NumElts = VT.getVectorNumElements();
6760 if (NumElts == 4) {
6761 unsigned PFIndexes[4];
6762 for (unsigned i = 0; i != 4; ++i) {
6763 if (ShuffleMask[i] < 0)
6764 PFIndexes[i] = 8;
6765 else
6766 PFIndexes[i] = ShuffleMask[i];
6767 }
6768
6769 // Compute the index in the perfect shuffle table.
6770 unsigned PFTableIndex =
6771 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6772 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6773 unsigned Cost = (PFEntry >> 30);
6774
6775 if (Cost <= 4)
6776 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
6777 }
6778
6779 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
6780 if (EltSize >= 32) {
6781 // Do the expansion with floating-point types, since that is what the VFP
6782 // registers are defined to use, and since i64 is not legal.
6783 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6784 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6785 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
6786 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
6787 SmallVector<SDValue, 8> Ops;
6788 for (unsigned i = 0; i < NumElts; ++i) {
6789 if (ShuffleMask[i] < 0)
6790 Ops.push_back(DAG.getUNDEF(EltVT));
6791 else
6792 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
6793 ShuffleMask[i] < (int)NumElts ? V1 : V2,
6794 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
6795 dl, MVT::i32)));
6796 }
6797 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6798 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6799 }
6800
6801 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
6802 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
6803
6804 if (VT == MVT::v8i8)
6805 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
6806 return NewOp;
6807
6808 return SDValue();
6809}
6810
6811static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
6812 // INSERT_VECTOR_ELT is legal only for immediate indexes.
6813 SDValue Lane = Op.getOperand(2);
6814 if (!isa<ConstantSDNode>(Lane))
6815 return SDValue();
6816
6817 return Op;
6818}
6819
6820static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
6821 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
6822 SDValue Lane = Op.getOperand(1);
6823 if (!isa<ConstantSDNode>(Lane))
6824 return SDValue();
6825
6826 SDValue Vec = Op.getOperand(0);
6827 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
6828 SDLoc dl(Op);
6829 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
6830 }
6831
6832 return Op;
6833}
6834
6835static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
6836 // The only time a CONCAT_VECTORS operation can have legal types is when
6837 // two 64-bit vectors are concatenated to a 128-bit vector.
6838 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&((Op.getValueType().is128BitVector() && Op.getNumOperands
() == 2 && "unexpected CONCAT_VECTORS") ? static_cast
<void> (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6839, __PRETTY_FUNCTION__))
6839 "unexpected CONCAT_VECTORS")((Op.getValueType().is128BitVector() && Op.getNumOperands
() == 2 && "unexpected CONCAT_VECTORS") ? static_cast
<void> (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6839, __PRETTY_FUNCTION__))
;
6840 SDLoc dl(Op);
6841 SDValue Val = DAG.getUNDEF(MVT::v2f64);
6842 SDValue Op0 = Op.getOperand(0);
6843 SDValue Op1 = Op.getOperand(1);
6844 if (!Op0.isUndef())
6845 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6846 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
6847 DAG.getIntPtrConstant(0, dl));
6848 if (!Op1.isUndef())
6849 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6850 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
6851 DAG.getIntPtrConstant(1, dl));
6852 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
6853}
6854
6855/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
6856/// element has been zero/sign-extended, depending on the isSigned parameter,
6857/// from an integer type half its size.
6858static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
6859 bool isSigned) {
6860 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
6861 EVT VT = N->getValueType(0);
6862 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
6863 SDNode *BVN = N->getOperand(0).getNode();
6864 if (BVN->getValueType(0) != MVT::v4i32 ||
6865 BVN->getOpcode() != ISD::BUILD_VECTOR)
6866 return false;
6867 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
6868 unsigned HiElt = 1 - LoElt;
6869 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
6870 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
6871 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
6872 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
6873 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
6874 return false;
6875 if (isSigned) {
6876 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
6877 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
6878 return true;
6879 } else {
6880 if (Hi0->isNullValue() && Hi1->isNullValue())
6881 return true;
6882 }
6883 return false;
6884 }
6885
6886 if (N->getOpcode() != ISD::BUILD_VECTOR)
6887 return false;
6888
6889 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
6890 SDNode *Elt = N->getOperand(i).getNode();
6891 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
6892 unsigned EltSize = VT.getScalarSizeInBits();
6893 unsigned HalfSize = EltSize / 2;
6894 if (isSigned) {
6895 if (!isIntN(HalfSize, C->getSExtValue()))
6896 return false;
6897 } else {
6898 if (!isUIntN(HalfSize, C->getZExtValue()))
6899 return false;
6900 }
6901 continue;
6902 }
6903 return false;
6904 }
6905
6906 return true;
6907}
6908
6909/// isSignExtended - Check if a node is a vector value that is sign-extended
6910/// or a constant BUILD_VECTOR with sign-extended elements.
6911static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
6912 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
6913 return true;
6914 if (isExtendedBUILD_VECTOR(N, DAG, true))
6915 return true;
6916 return false;
6917}
6918
6919/// isZeroExtended - Check if a node is a vector value that is zero-extended
6920/// or a constant BUILD_VECTOR with zero-extended elements.
6921static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
6922 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
6923 return true;
6924 if (isExtendedBUILD_VECTOR(N, DAG, false))
6925 return true;
6926 return false;
6927}
6928
6929static EVT getExtensionTo64Bits(const EVT &OrigVT) {
6930 if (OrigVT.getSizeInBits() >= 64)
6931 return OrigVT;
6932
6933 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6933, __PRETTY_FUNCTION__))
;
6934
6935 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
6936 switch (OrigSimpleTy) {
6937 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6937)
;
6938 case MVT::v2i8:
6939 case MVT::v2i16:
6940 return MVT::v2i32;
6941 case MVT::v4i8:
6942 return MVT::v4i16;
6943 }
6944}
6945
6946/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
6947/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
6948/// We insert the required extension here to get the vector to fill a D register.
6949static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
6950 const EVT &OrigTy,
6951 const EVT &ExtTy,
6952 unsigned ExtOpcode) {
6953 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
6954 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
6955 // 64-bits we need to insert a new extension so that it will be 64-bits.
6956 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 6956, __PRETTY_FUNCTION__))
;
6957 if (OrigTy.getSizeInBits() >= 64)
6958 return N;
6959
6960 // Must extend size to at least 64 bits to be used as an operand for VMULL.
6961 EVT NewVT = getExtensionTo64Bits(OrigTy);
6962
6963 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
6964}
6965
6966/// SkipLoadExtensionForVMULL - return a load of the original vector size that
6967/// does not do any sign/zero extension. If the original vector is less
6968/// than 64 bits, an appropriate extension will be added after the load to
6969/// reach a total size of 64 bits. We have to add the extension separately
6970/// because ARM does not have a sign/zero extending load for vectors.
6971static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
6972 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
6973
6974 // The load already has the right type.
6975 if (ExtendedTy == LD->getMemoryVT())
6976 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
6977 LD->getBasePtr(), LD->getPointerInfo(),
6978 LD->getAlignment(), LD->getMemOperand()->getFlags());
6979
6980 // We need to create a zextload/sextload. We cannot just create a load
6981 // followed by a zext/zext node because LowerMUL is also run during normal
6982 // operation legalization where we can't create illegal types.
6983 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
6984 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
6985 LD->getMemoryVT(), LD->getAlignment(),
6986 LD->getMemOperand()->getFlags());
6987}
6988
6989/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
6990/// extending load, or BUILD_VECTOR with extended elements, return the
6991/// unextended value. The unextended vector should be 64 bits so that it can
6992/// be used as an operand to a VMULL instruction. If the original vector size
6993/// before extension is less than 64 bits we add a an extension to resize
6994/// the vector to 64 bits.
6995static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
6996 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
6997 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
6998 N->getOperand(0)->getValueType(0),
6999 N->getValueType(0),
7000 N->getOpcode());
7001
7002 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
7003 assert((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) &&(((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && "Expected extending load"
) ? static_cast<void> (0) : __assert_fail ("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7004, __PRETTY_FUNCTION__))
7004 "Expected extending load")(((ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && "Expected extending load"
) ? static_cast<void> (0) : __assert_fail ("(ISD::isSEXTLoad(LD) || ISD::isZEXTLoad(LD)) && \"Expected extending load\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7004, __PRETTY_FUNCTION__))
;
7005
7006 SDValue newLoad = SkipLoadExtensionForVMULL(LD, DAG);
7007 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), newLoad.getValue(1));
7008 unsigned Opcode = ISD::isSEXTLoad(LD) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
7009 SDValue extLoad =
7010 DAG.getNode(Opcode, SDLoc(newLoad), LD->getValueType(0), newLoad);
7011 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 0), extLoad);
7012
7013 return newLoad;
7014 }
7015
7016 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
7017 // have been legalized as a BITCAST from v4i32.
7018 if (N->getOpcode() == ISD::BITCAST) {
7019 SDNode *BVN = N->getOperand(0).getNode();
7020 assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&((BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->
getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7021, __PRETTY_FUNCTION__))
7021 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR")((BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->
getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7021, __PRETTY_FUNCTION__))
;
7022 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
7023 return DAG.getBuildVector(
7024 MVT::v2i32, SDLoc(N),
7025 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
7026 }
7027 // Construct a new BUILD_VECTOR with elements truncated to half the size.
7028 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7028, __PRETTY_FUNCTION__))
;
7029 EVT VT = N->getValueType(0);
7030 unsigned EltSize = VT.getScalarSizeInBits() / 2;
7031 unsigned NumElts = VT.getVectorNumElements();
7032 MVT TruncVT = MVT::getIntegerVT(EltSize);
7033 SmallVector<SDValue, 8> Ops;
7034 SDLoc dl(N);
7035 for (unsigned i = 0; i != NumElts; ++i) {
7036 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
7037 const APInt &CInt = C->getAPIntValue();
7038 // Element types smaller than 32 bits are not legal, so use i32 elements.
7039 // The values are implicitly truncated so sext vs. zext doesn't matter.
7040 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
7041 }
7042 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
7043}
7044
7045static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
7046 unsigned Opcode = N->getOpcode();
7047 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7048 SDNode *N0 = N->getOperand(0).getNode();
7049 SDNode *N1 = N->getOperand(1).getNode();
7050 return N0->hasOneUse() && N1->hasOneUse() &&
7051 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
7052 }
7053 return false;
7054}
7055
7056static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
7057 unsigned Opcode = N->getOpcode();
7058 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7059 SDNode *N0 = N->getOperand(0).getNode();
7060 SDNode *N1 = N->getOperand(1).getNode();
7061 return N0->hasOneUse() && N1->hasOneUse() &&
7062 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
7063 }
7064 return false;
7065}
7066
7067static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
7068 // Multiplications are only custom-lowered for 128-bit vectors so that
7069 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
7070 EVT VT = Op.getValueType();
7071 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7072, __PRETTY_FUNCTION__))
7072 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7072, __PRETTY_FUNCTION__))
;
7073 SDNode *N0 = Op.getOperand(0).getNode();
7074 SDNode *N1 = Op.getOperand(1).getNode();
7075 unsigned NewOpc = 0;
7076 bool isMLA = false;
7077 bool isN0SExt = isSignExtended(N0, DAG);
7078 bool isN1SExt = isSignExtended(N1, DAG);
7079 if (isN0SExt && isN1SExt)
7080 NewOpc = ARMISD::VMULLs;
7081 else {
7082 bool isN0ZExt = isZeroExtended(N0, DAG);
7083 bool isN1ZExt = isZeroExtended(N1, DAG);
7084 if (isN0ZExt && isN1ZExt)
7085 NewOpc = ARMISD::VMULLu;
7086 else if (isN1SExt || isN1ZExt) {
7087 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
7088 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
7089 if (isN1SExt && isAddSubSExt(N0, DAG)) {
7090 NewOpc = ARMISD::VMULLs;
7091 isMLA = true;
7092 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
7093 NewOpc = ARMISD::VMULLu;
7094 isMLA = true;
7095 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
7096 std::swap(N0, N1);
7097 NewOpc = ARMISD::VMULLu;
7098 isMLA = true;
7099 }
7100 }
7101
7102 if (!NewOpc) {
7103 if (VT == MVT::v2i64)
7104 // Fall through to expand this. It is not legal.
7105 return SDValue();
7106 else
7107 // Other vector multiplications are legal.
7108 return Op;
7109 }
7110 }
7111
7112 // Legalize to a VMULL instruction.
7113 SDLoc DL(Op);
7114 SDValue Op0;
7115 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
7116 if (!isMLA) {
7117 Op0 = SkipExtensionForVMULL(N0, DAG);
7118 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7120, __PRETTY_FUNCTION__))
7119 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7120, __PRETTY_FUNCTION__))
7120 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7120, __PRETTY_FUNCTION__))
;
7121 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
7122 }
7123
7124 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
7125 // isel lowering to take advantage of no-stall back to back vmul + vmla.
7126 // vmull q0, d4, d6
7127 // vmlal q0, d5, d6
7128 // is faster than
7129 // vaddl q0, d4, d5
7130 // vmovl q1, d6
7131 // vmul q0, q0, q1
7132 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
7133 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
7134 EVT Op1VT = Op1.getValueType();
7135 return DAG.getNode(N0->getOpcode(), DL, VT,
7136 DAG.getNode(NewOpc, DL, VT,
7137 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
7138 DAG.getNode(NewOpc, DL, VT,
7139 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
7140}
7141
7142static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
7143 SelectionDAG &DAG) {
7144 // TODO: Should this propagate fast-math-flags?
7145
7146 // Convert to float
7147 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
7148 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
7149 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
7150 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
7151 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
7152 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
7153 // Get reciprocal estimate.
7154 // float4 recip = vrecpeq_f32(yf);
7155 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7156 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7157 Y);
7158 // Because char has a smaller range than uchar, we can actually get away
7159 // without any newton steps. This requires that we use a weird bias
7160 // of 0xb000, however (again, this has been exhaustively tested).
7161 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
7162 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
7163 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
7164 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
7165 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
7166 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
7167 // Convert back to short.
7168 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
7169 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
7170 return X;
7171}
7172
7173static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
7174 SelectionDAG &DAG) {
7175 // TODO: Should this propagate fast-math-flags?
7176
7177 SDValue N2;
7178 // Convert to float.
7179 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
7180 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
7181 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
7182 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
7183 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7184 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7185
7186 // Use reciprocal estimate and one refinement step.
7187 // float4 recip = vrecpeq_f32(yf);
7188 // recip *= vrecpsq_f32(yf, recip);
7189 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7190 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7191 N1);
7192 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7193 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7194 N1, N2);
7195 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7196 // Because short has a smaller range than ushort, we can actually get away
7197 // with only a single newton step. This requires that we use a weird bias
7198 // of 89, however (again, this has been exhaustively tested).
7199 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
7200 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7201 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7202 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
7203 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7204 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7205 // Convert back to integer and return.
7206 // return vmovn_s32(vcvt_s32_f32(result));
7207 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7208 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7209 return N0;
7210}
7211
7212static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
7213 EVT VT = Op.getValueType();
7214 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7215, __PRETTY_FUNCTION__))
7215 "unexpected type for custom-lowering ISD::SDIV")(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7215, __PRETTY_FUNCTION__))
;
7216
7217 SDLoc dl(Op);
7218 SDValue N0 = Op.getOperand(0);
7219 SDValue N1 = Op.getOperand(1);
7220 SDValue N2, N3;
7221
7222 if (VT == MVT::v8i8) {
7223 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
7224 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
7225
7226 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7227 DAG.getIntPtrConstant(4, dl));
7228 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7229 DAG.getIntPtrConstant(4, dl));
7230 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7231 DAG.getIntPtrConstant(0, dl));
7232 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7233 DAG.getIntPtrConstant(0, dl));
7234
7235 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
7236 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
7237
7238 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7239 N0 = LowerCONCAT_VECTORS(N0, DAG);
7240
7241 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
7242 return N0;
7243 }
7244 return LowerSDIV_v4i16(N0, N1, dl, DAG);
7245}
7246
7247static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
7248 // TODO: Should this propagate fast-math-flags?
7249 EVT VT = Op.getValueType();
7250 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7251, __PRETTY_FUNCTION__))
7251 "unexpected type for custom-lowering ISD::UDIV")(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7251, __PRETTY_FUNCTION__))
;
7252
7253 SDLoc dl(Op);
7254 SDValue N0 = Op.getOperand(0);
7255 SDValue N1 = Op.getOperand(1);
7256 SDValue N2, N3;
7257
7258 if (VT == MVT::v8i8) {
7259 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
7260 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
7261
7262 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7263 DAG.getIntPtrConstant(4, dl));
7264 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7265 DAG.getIntPtrConstant(4, dl));
7266 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7267 DAG.getIntPtrConstant(0, dl));
7268 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7269 DAG.getIntPtrConstant(0, dl));
7270
7271 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
7272 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
7273
7274 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7275 N0 = LowerCONCAT_VECTORS(N0, DAG);
7276
7277 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
7278 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
7279 MVT::i32),
7280 N0);
7281 return N0;
7282 }
7283
7284 // v4i16 sdiv ... Convert to float.
7285 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
7286 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
7287 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
7288 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
7289 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7290 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7291
7292 // Use reciprocal estimate and two refinement steps.
7293 // float4 recip = vrecpeq_f32(yf);
7294 // recip *= vrecpsq_f32(yf, recip);
7295 // recip *= vrecpsq_f32(yf, recip);
7296 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7297 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7298 BN1);
7299 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7300 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7301 BN1, N2);
7302 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7303 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7304 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7305 BN1, N2);
7306 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7307 // Simply multiplying by the reciprocal estimate can leave us a few ulps
7308 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
7309 // and that it will never cause us to return an answer too large).
7310 // float4 result = as_float4(as_int4(xf*recip) + 2);
7311 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7312 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7313 N1 = DAG.getConstant(2, dl, MVT::v4i32);
7314 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7315 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7316 // Convert back to integer and return.
7317 // return vmovn_u32(vcvt_s32_f32(result));
7318 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7319 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7320 return N0;
7321}
7322
7323static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
7324 EVT VT = Op.getNode()->getValueType(0);
7325 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
7326
7327 unsigned Opc;
7328 bool ExtraOp = false;
7329 switch (Op.getOpcode()) {
7330 default: llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7330)
;
7331 case ISD::ADDC: Opc = ARMISD::ADDC; break;
7332 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
7333 case ISD::SUBC: Opc = ARMISD::SUBC; break;
7334 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
7335 }
7336
7337 if (!ExtraOp)
7338 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7339 Op.getOperand(1));
7340 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7341 Op.getOperand(1), Op.getOperand(2));
7342}
7343
7344SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
7345 assert(Subtarget->isTargetDarwin())((Subtarget->isTargetDarwin()) ? static_cast<void> (
0) : __assert_fail ("Subtarget->isTargetDarwin()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7345, __PRETTY_FUNCTION__))
;
7346
7347 // For iOS, we want to call an alternative entry point: __sincos_stret,
7348 // return values are passed via sret.
7349 SDLoc dl(Op);
7350 SDValue Arg = Op.getOperand(0);
7351 EVT ArgVT = Arg.getValueType();
7352 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
7353 auto PtrVT = getPointerTy(DAG.getDataLayout());
7354
7355 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7356 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7357
7358 // Pair of floats / doubles used to pass the result.
7359 Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
7360 auto &DL = DAG.getDataLayout();
7361
7362 ArgListTy Args;
7363 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
7364 SDValue SRet;
7365 if (ShouldUseSRet) {
7366 // Create stack object for sret.
7367 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
7368 const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
7369 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
7370 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
7371
7372 ArgListEntry Entry;
7373 Entry.Node = SRet;
7374 Entry.Ty = RetTy->getPointerTo();
7375 Entry.IsSExt = false;
7376 Entry.IsZExt = false;
7377 Entry.IsSRet = true;
7378 Args.push_back(Entry);
7379 RetTy = Type::getVoidTy(*DAG.getContext());
7380 }
7381
7382 ArgListEntry Entry;
7383 Entry.Node = Arg;
7384 Entry.Ty = ArgTy;
7385 Entry.IsSExt = false;
7386 Entry.IsZExt = false;
7387 Args.push_back(Entry);
7388
7389 const char *LibcallName =
7390 (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
7391 RTLIB::Libcall LC =
7392 (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
7393 CallingConv::ID CC = getLibcallCallingConv(LC);
7394 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
7395
7396 TargetLowering::CallLoweringInfo CLI(DAG);
7397 CLI.setDebugLoc(dl)
7398 .setChain(DAG.getEntryNode())
7399 .setCallee(CC, RetTy, Callee, std::move(Args))
7400 .setDiscardResult(ShouldUseSRet);
7401 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7402
7403 if (!ShouldUseSRet)
7404 return CallResult.first;
7405
7406 SDValue LoadSin =
7407 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
7408
7409 // Address of cos field.
7410 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
7411 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
7412 SDValue LoadCos =
7413 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
7414
7415 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
7416 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
7417 LoadSin.getValue(0), LoadCos.getValue(0));
7418}
7419
7420SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
7421 bool Signed,
7422 SDValue &Chain) const {
7423 EVT VT = Op.getValueType();
7424 assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7425, __PRETTY_FUNCTION__))
7425 "unexpected type for custom lowering DIV")(((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7425, __PRETTY_FUNCTION__))
;
7426 SDLoc dl(Op);
7427
7428 const auto &DL = DAG.getDataLayout();
7429 const auto &TLI = DAG.getTargetLoweringInfo();
7430
7431 const char *Name = nullptr;
7432 if (Signed)
7433 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
7434 else
7435 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
7436
7437 SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
7438
7439 ARMTargetLowering::ArgListTy Args;
7440
7441 for (auto AI : {1, 0}) {
7442 ArgListEntry Arg;
7443 Arg.Node = Op.getOperand(AI);
7444 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
7445 Args.push_back(Arg);
7446 }
7447
7448 CallLoweringInfo CLI(DAG);
7449 CLI.setDebugLoc(dl)
7450 .setChain(Chain)
7451 .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
7452 ES, std::move(Args));
7453
7454 return LowerCallTo(CLI).first;
7455}
7456
7457SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
7458 bool Signed) const {
7459 assert(Op.getValueType() == MVT::i32 &&((Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7460, __PRETTY_FUNCTION__))
7460 "unexpected type for custom lowering DIV")((Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7460, __PRETTY_FUNCTION__))
;
7461 SDLoc dl(Op);
7462
7463 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
7464 DAG.getEntryNode(), Op.getOperand(1));
7465
7466 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7467}
7468
7469static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
7470 SDLoc DL(N);
7471 SDValue Op = N->getOperand(1);
7472 if (N->getValueType(0) == MVT::i32)
7473 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
7474 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7475 DAG.getConstant(0, DL, MVT::i32));
7476 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7477 DAG.getConstant(1, DL, MVT::i32));
7478 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
7479 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
7480}
7481
7482void ARMTargetLowering::ExpandDIV_Windows(
7483 SDValue Op, SelectionDAG &DAG, bool Signed,
7484 SmallVectorImpl<SDValue> &Results) const {
7485 const auto &DL = DAG.getDataLayout();
7486 const auto &TLI = DAG.getTargetLoweringInfo();
7487
7488 assert(Op.getValueType() == MVT::i64 &&((Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7489, __PRETTY_FUNCTION__))
7489 "unexpected type for custom lowering DIV")((Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7489, __PRETTY_FUNCTION__))
;
7490 SDLoc dl(Op);
7491
7492 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
7493
7494 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7495
7496 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
7497 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
7498 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
7499 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
7500
7501 Results.push_back(Lower);
7502 Results.push_back(Upper);
7503}
7504
7505static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
7506 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
7507 // Acquire/Release load/store is not legal for targets without a dmb or
7508 // equivalent available.
7509 return SDValue();
7510
7511 // Monotonic load/store is legal for all targets.
7512 return Op;
7513}
7514
7515static void ReplaceREADCYCLECOUNTER(SDNode *N,
7516 SmallVectorImpl<SDValue> &Results,
7517 SelectionDAG &DAG,
7518 const ARMSubtarget *Subtarget) {
7519 SDLoc DL(N);
7520 // Under Power Management extensions, the cycle-count is:
7521 // mrc p15, #0, <Rt>, c9, c13, #0
7522 SDValue Ops[] = { N->getOperand(0), // Chain
7523 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
7524 DAG.getConstant(15, DL, MVT::i32),
7525 DAG.getConstant(0, DL, MVT::i32),
7526 DAG.getConstant(9, DL, MVT::i32),
7527 DAG.getConstant(13, DL, MVT::i32),
7528 DAG.getConstant(0, DL, MVT::i32)
7529 };
7530
7531 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
7532 DAG.getVTList(MVT::i32, MVT::Other), Ops);
7533 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
7534 DAG.getConstant(0, DL, MVT::i32)));
7535 Results.push_back(Cycles32.getValue(1));
7536}
7537
7538static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
7539 SDLoc dl(V.getNode());
7540 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
7541 SDValue VHi = DAG.getAnyExtOrTrunc(
7542 DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
7543 dl, MVT::i32);
7544 SDValue RegClass =
7545 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
7546 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
7547 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
7548 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
7549 return SDValue(
7550 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
7551}
7552
7553static void ReplaceCMP_SWAP_64Results(SDNode *N,
7554 SmallVectorImpl<SDValue> & Results,
7555 SelectionDAG &DAG) {
7556 assert(N->getValueType(0) == MVT::i64 &&((N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7557, __PRETTY_FUNCTION__))
7557 "AtomicCmpSwap on types less than 64 should be legal")((N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7557, __PRETTY_FUNCTION__))
;
7558 SDValue Ops[] = {N->getOperand(1),
7559 createGPRPairNode(DAG, N->getOperand(2)),
7560 createGPRPairNode(DAG, N->getOperand(3)),
7561 N->getOperand(0)};
7562 SDNode *CmpSwap = DAG.getMachineNode(
7563 ARM::CMP_SWAP_64, SDLoc(N),
7564 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
7565
7566 MachineFunction &MF = DAG.getMachineFunction();
7567 MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
7568 MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
7569 cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
7570
7571 Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32,
7572 SDValue(CmpSwap, 0)));
7573 Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32,
7574 SDValue(CmpSwap, 0)));
7575 Results.push_back(SDValue(CmpSwap, 2));
7576}
7577
7578static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
7579 SelectionDAG &DAG) {
7580 const auto &TLI = DAG.getTargetLoweringInfo();
7581
7582 assert(Subtarget.getTargetTriple().isOSMSVCRT() &&((Subtarget.getTargetTriple().isOSMSVCRT() && "Custom lowering is MSVCRT specific!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.getTargetTriple().isOSMSVCRT() && \"Custom lowering is MSVCRT specific!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7583, __PRETTY_FUNCTION__))
7583 "Custom lowering is MSVCRT specific!")((Subtarget.getTargetTriple().isOSMSVCRT() && "Custom lowering is MSVCRT specific!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.getTargetTriple().isOSMSVCRT() && \"Custom lowering is MSVCRT specific!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7583, __PRETTY_FUNCTION__))
;
7584
7585 SDLoc dl(Op);
7586 SDValue Val = Op.getOperand(0);
7587 MVT Ty = Val->getSimpleValueType(0);
7588 SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
7589 SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
7590 TLI.getPointerTy(DAG.getDataLayout()));
7591
7592 TargetLowering::ArgListTy Args;
7593 TargetLowering::ArgListEntry Entry;
7594
7595 Entry.Node = Val;
7596 Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
7597 Entry.IsZExt = true;
7598 Args.push_back(Entry);
7599
7600 Entry.Node = Exponent;
7601 Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
7602 Entry.IsZExt = true;
7603 Args.push_back(Entry);
7604
7605 Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
7606
7607 // In the in-chain to the call is the entry node If we are emitting a
7608 // tailcall, the chain will be mutated if the node has a non-entry input
7609 // chain.
7610 SDValue InChain = DAG.getEntryNode();
7611 SDValue TCChain = InChain;
7612
7613 const auto *F = DAG.getMachineFunction().getFunction();
7614 bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
7615 F->getReturnType() == LCRTy;
7616 if (IsTC)
7617 InChain = TCChain;
7618
7619 TargetLowering::CallLoweringInfo CLI(DAG);
7620 CLI.setDebugLoc(dl)
7621 .setChain(InChain)
7622 .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
7623 .setTailCall(IsTC);
7624 std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
7625
7626 // Return the chain (the DAG root) if it is a tail call
7627 return !CI.second.getNode() ? DAG.getRoot() : CI.first;
7628}
7629
7630SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
7631 switch (Op.getOpcode()) {
7632 default: llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7632)
;
7633 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
7634 case ISD::ConstantPool:
7635 if (Subtarget->genExecuteOnly())
7636 llvm_unreachable("execute-only should not generate constant pools")::llvm::llvm_unreachable_internal("execute-only should not generate constant pools"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7636)
;
7637 return LowerConstantPool(Op, DAG);
7638 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
7639 case ISD::GlobalAddress:
7640 switch (Subtarget->getTargetTriple().getObjectFormat()) {
7641 default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7641)
;
7642 case Triple::COFF:
7643 return LowerGlobalAddressWindows(Op, DAG);
7644 case Triple::ELF:
7645 return LowerGlobalAddressELF(Op, DAG);
7646 case Triple::MachO:
7647 return LowerGlobalAddressDarwin(Op, DAG);
7648 }
7649 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
7650 case ISD::SELECT: return LowerSELECT(Op, DAG);
7651 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
7652 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
7653 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
7654 case ISD::VASTART: return LowerVASTART(Op, DAG);
7655 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7656 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
7657 case ISD::SINT_TO_FP:
7658 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
7659 case ISD::FP_TO_SINT:
7660 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
7661 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
7662 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
7663 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
7664 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
7665 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
7666 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
7667 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
7668 Subtarget);
7669 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
7670 case ISD::SHL:
7671 case ISD::SRL:
7672 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
7673 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
7674 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
7675 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
7676 case ISD::SRL_PARTS:
7677 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
7678 case ISD::CTTZ:
7679 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
7680 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
7681 case ISD::SETCC: return LowerVSETCC(Op, DAG);
7682 case ISD::SETCCE: return LowerSETCCE(Op, DAG);
7683 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
7684 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
7685 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
7686 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
7687 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
7688 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
7689 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
7690 case ISD::MUL: return LowerMUL(Op, DAG);
7691 case ISD::SDIV:
7692 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
7693 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
7694 return LowerSDIV(Op, DAG);
7695 case ISD::UDIV:
7696 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
7697 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
7698 return LowerUDIV(Op, DAG);
7699 case ISD::ADDC:
7700 case ISD::ADDE:
7701 case ISD::SUBC:
7702 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
7703 case ISD::SADDO:
7704 case ISD::UADDO:
7705 case ISD::SSUBO:
7706 case ISD::USUBO:
7707 return LowerXALUO(Op, DAG);
7708 case ISD::ATOMIC_LOAD:
7709 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
7710 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
7711 case ISD::SDIVREM:
7712 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
7713 case ISD::DYNAMIC_STACKALLOC:
7714 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
7715 return LowerDYNAMIC_STACKALLOC(Op, DAG);
7716 llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7716)
;
7717 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
7718 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
7719 case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
7720 case ARMISD::WIN__DBZCHK: return SDValue();
7721 }
7722}
7723
7724/// ReplaceNodeResults - Replace the results of node with an illegal result
7725/// type with new values built out of custom code.
7726void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
7727 SmallVectorImpl<SDValue> &Results,
7728 SelectionDAG &DAG) const {
7729 SDValue Res;
7730 switch (N->getOpcode()) {
7731 default:
7732 llvm_unreachable("Don't know how to custom expand this!")::llvm::llvm_unreachable_internal("Don't know how to custom expand this!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn301135/lib/Target/ARM/ARMISelLowering.cpp"
, 7732)
;
7733 case ISD::READ_REGISTER:
7734 ExpandREAD_REGISTER(N, Results, DAG);
7735 break;
7736 case ISD::BITCAST:
7737 Res = ExpandBITCAS