Bug Summary

File:lib/Target/ARM/ARMISelLowering.cpp
Warning:line 453, column 18
Excessive padding in 'struct (anonymous at /tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp:453:18)' (8 padding bytes, where 0 is optimal). Optimal fields order: Name, Op, CC, consider reordering the fields or adding explicit padding members

Annotated Source Code

1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that ARM uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "ARMBaseInstrInfo.h"
16#include "ARMBaseRegisterInfo.h"
17#include "ARMCallingConv.h"
18#include "ARMConstantPoolValue.h"
19#include "ARMISelLowering.h"
20#include "ARMMachineFunctionInfo.h"
21#include "ARMPerfectShuffle.h"
22#include "ARMRegisterInfo.h"
23#include "ARMSelectionDAGInfo.h"
24#include "ARMSubtarget.h"
25#include "MCTargetDesc/ARMAddressingModes.h"
26#include "MCTargetDesc/ARMBaseInfo.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/APInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/BitVector.h"
31#include "llvm/ADT/DenseMap.h"
32#include "llvm/ADT/SmallPtrSet.h"
33#include "llvm/ADT/SmallVector.h"
34#include "llvm/ADT/Statistic.h"
35#include "llvm/ADT/STLExtras.h"
36#include "llvm/ADT/StringExtras.h"
37#include "llvm/ADT/StringSwitch.h"
38#include "llvm/ADT/StringRef.h"
39#include "llvm/ADT/Triple.h"
40#include "llvm/ADT/Twine.h"
41#include "llvm/Analysis/VectorUtils.h"
42#include "llvm/CodeGen/CallingConvLower.h"
43#include "llvm/CodeGen/ISDOpcodes.h"
44#include "llvm/CodeGen/IntrinsicLowering.h"
45#include "llvm/CodeGen/MachineBasicBlock.h"
46#include "llvm/CodeGen/MachineConstantPool.h"
47#include "llvm/CodeGen/MachineFrameInfo.h"
48#include "llvm/CodeGen/MachineFunction.h"
49#include "llvm/CodeGen/MachineInstr.h"
50#include "llvm/CodeGen/MachineInstrBuilder.h"
51#include "llvm/CodeGen/MachineJumpTableInfo.h"
52#include "llvm/CodeGen/MachineMemOperand.h"
53#include "llvm/CodeGen/MachineOperand.h"
54#include "llvm/CodeGen/MachineRegisterInfo.h"
55#include "llvm/CodeGen/MachineValueType.h"
56#include "llvm/CodeGen/RuntimeLibcalls.h"
57#include "llvm/CodeGen/SelectionDAG.h"
58#include "llvm/CodeGen/SelectionDAGNodes.h"
59#include "llvm/CodeGen/ValueTypes.h"
60#include "llvm/IR/Attributes.h"
61#include "llvm/IR/CallingConv.h"
62#include "llvm/IR/Constant.h"
63#include "llvm/IR/Constants.h"
64#include "llvm/IR/Function.h"
65#include "llvm/IR/DataLayout.h"
66#include "llvm/IR/DebugLoc.h"
67#include "llvm/IR/DerivedTypes.h"
68#include "llvm/IR/Function.h"
69#include "llvm/IR/GlobalAlias.h"
70#include "llvm/IR/GlobalValue.h"
71#include "llvm/IR/GlobalVariable.h"
72#include "llvm/IR/IRBuilder.h"
73#include "llvm/IR/InlineAsm.h"
74#include "llvm/IR/Instruction.h"
75#include "llvm/IR/Instructions.h"
76#include "llvm/IR/IntrinsicInst.h"
77#include "llvm/IR/Intrinsics.h"
78#include "llvm/IR/Module.h"
79#include "llvm/IR/Type.h"
80#include "llvm/IR/User.h"
81#include "llvm/IR/Value.h"
82#include "llvm/MC/MCInstrDesc.h"
83#include "llvm/MC/MCInstrItineraries.h"
84#include "llvm/MC/MCRegisterInfo.h"
85#include "llvm/MC/MCSchedule.h"
86#include "llvm/Support/AtomicOrdering.h"
87#include "llvm/Support/BranchProbability.h"
88#include "llvm/Support/Casting.h"
89#include "llvm/Support/CodeGen.h"
90#include "llvm/Support/CommandLine.h"
91#include "llvm/Support/Compiler.h"
92#include "llvm/Support/Debug.h"
93#include "llvm/Support/ErrorHandling.h"
94#include "llvm/Support/MathExtras.h"
95#include "llvm/Support/raw_ostream.h"
96#include "llvm/Target/TargetInstrInfo.h"
97#include "llvm/Target/TargetMachine.h"
98#include "llvm/Target/TargetOptions.h"
99#include <algorithm>
100#include <cassert>
101#include <cstdint>
102#include <cstdlib>
103#include <iterator>
104#include <limits>
105#include <tuple>
106#include <string>
107#include <utility>
108#include <vector>
109
110using namespace llvm;
111
112#define DEBUG_TYPE"arm-isel" "arm-isel"
113
114STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"arm-isel", "NumTailCalls"
, "Number of tail calls", {0}, false}
;
115STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt")static llvm::Statistic NumMovwMovt = {"arm-isel", "NumMovwMovt"
, "Number of GAs materialized with movw + movt", {0}, false}
;
116STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments")static llvm::Statistic NumLoopByVals = {"arm-isel", "NumLoopByVals"
, "Number of loops generated for byval arguments", {0}, false
}
;
117STATISTIC(NumConstpoolPromoted,static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
, {0}, false}
118 "Number of constants with their storage promoted into constant pools")static llvm::Statistic NumConstpoolPromoted = {"arm-isel", "NumConstpoolPromoted"
, "Number of constants with their storage promoted into constant pools"
, {0}, false}
;
119
120static cl::opt<bool>
121ARMInterworking("arm-interworking", cl::Hidden,
122 cl::desc("Enable / disable ARM interworking (for debugging only)"),
123 cl::init(true));
124
125static cl::opt<bool> EnableConstpoolPromotion(
126 "arm-promote-constant", cl::Hidden,
127 cl::desc("Enable / disable promotion of unnamed_addr constants into "
128 "constant pools"),
129 cl::init(true));
130static cl::opt<unsigned> ConstpoolPromotionMaxSize(
131 "arm-promote-constant-max-size", cl::Hidden,
132 cl::desc("Maximum size of constant to promote into a constant pool"),
133 cl::init(64));
134static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
135 "arm-promote-constant-max-total", cl::Hidden,
136 cl::desc("Maximum size of ALL constants to promote into a constant pool"),
137 cl::init(128));
138
139// The APCS parameter registers.
140static const MCPhysReg GPRArgRegs[] = {
141 ARM::R0, ARM::R1, ARM::R2, ARM::R3
142};
143
144void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
145 MVT PromotedBitwiseVT) {
146 if (VT != PromotedLdStVT) {
147 setOperationAction(ISD::LOAD, VT, Promote);
148 AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT);
149
150 setOperationAction(ISD::STORE, VT, Promote);
151 AddPromotedToType (ISD::STORE, VT, PromotedLdStVT);
152 }
153
154 MVT ElemTy = VT.getVectorElementType();
155 if (ElemTy != MVT::f64)
156 setOperationAction(ISD::SETCC, VT, Custom);
157 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
158 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
159 if (ElemTy == MVT::i32) {
160 setOperationAction(ISD::SINT_TO_FP, VT, Custom);
161 setOperationAction(ISD::UINT_TO_FP, VT, Custom);
162 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
163 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
164 } else {
165 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
166 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
167 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
168 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
169 }
170 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
171 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
172 setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
173 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
174 setOperationAction(ISD::SELECT, VT, Expand);
175 setOperationAction(ISD::SELECT_CC, VT, Expand);
176 setOperationAction(ISD::VSELECT, VT, Expand);
177 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
178 if (VT.isInteger()) {
179 setOperationAction(ISD::SHL, VT, Custom);
180 setOperationAction(ISD::SRA, VT, Custom);
181 setOperationAction(ISD::SRL, VT, Custom);
182 }
183
184 // Promote all bit-wise operations.
185 if (VT.isInteger() && VT != PromotedBitwiseVT) {
186 setOperationAction(ISD::AND, VT, Promote);
187 AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT);
188 setOperationAction(ISD::OR, VT, Promote);
189 AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT);
190 setOperationAction(ISD::XOR, VT, Promote);
191 AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT);
192 }
193
194 // Neon does not support vector divide/remainder operations.
195 setOperationAction(ISD::SDIV, VT, Expand);
196 setOperationAction(ISD::UDIV, VT, Expand);
197 setOperationAction(ISD::FDIV, VT, Expand);
198 setOperationAction(ISD::SREM, VT, Expand);
199 setOperationAction(ISD::UREM, VT, Expand);
200 setOperationAction(ISD::FREM, VT, Expand);
201
202 if (!VT.isFloatingPoint() &&
203 VT != MVT::v2i64 && VT != MVT::v1i64)
204 for (unsigned Opcode : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
205 setOperationAction(Opcode, VT, Legal);
206}
207
208void ARMTargetLowering::addDRTypeForNEON(MVT VT) {
209 addRegisterClass(VT, &ARM::DPRRegClass);
210 addTypeForNEON(VT, MVT::f64, MVT::v2i32);
211}
212
213void ARMTargetLowering::addQRTypeForNEON(MVT VT) {
214 addRegisterClass(VT, &ARM::DPairRegClass);
215 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32);
216}
217
218ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
219 const ARMSubtarget &STI)
220 : TargetLowering(TM), Subtarget(&STI) {
221 RegInfo = Subtarget->getRegisterInfo();
222 Itins = Subtarget->getInstrItineraryData();
223
224 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
225
226 if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetIOS() &&
227 !Subtarget->isTargetWatchOS()) {
228 const auto &E = Subtarget->getTargetTriple().getEnvironment();
229
230 bool IsHFTarget = E == Triple::EABIHF || E == Triple::GNUEABIHF ||
231 E == Triple::MuslEABIHF;
232 // Windows is a special case. Technically, we will replace all of the "GNU"
233 // calls with calls to MSVCRT if appropriate and adjust the calling
234 // convention then.
235 IsHFTarget = IsHFTarget || Subtarget->isTargetWindows();
236
237 for (int LCID = 0; LCID < RTLIB::UNKNOWN_LIBCALL; ++LCID)
238 setLibcallCallingConv(static_cast<RTLIB::Libcall>(LCID),
239 IsHFTarget ? CallingConv::ARM_AAPCS_VFP
240 : CallingConv::ARM_AAPCS);
241 }
242
243 if (Subtarget->isTargetMachO()) {
244 // Uses VFP for Thumb libfuncs if available.
245 if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
246 Subtarget->hasARMOps() && !Subtarget->useSoftFloat()) {
247 static const struct {
248 const RTLIB::Libcall Op;
249 const char * const Name;
250 const ISD::CondCode Cond;
251 } LibraryCalls[] = {
252 // Single-precision floating-point arithmetic.
253 { RTLIB::ADD_F32, "__addsf3vfp", ISD::SETCC_INVALID },
254 { RTLIB::SUB_F32, "__subsf3vfp", ISD::SETCC_INVALID },
255 { RTLIB::MUL_F32, "__mulsf3vfp", ISD::SETCC_INVALID },
256 { RTLIB::DIV_F32, "__divsf3vfp", ISD::SETCC_INVALID },
257
258 // Double-precision floating-point arithmetic.
259 { RTLIB::ADD_F64, "__adddf3vfp", ISD::SETCC_INVALID },
260 { RTLIB::SUB_F64, "__subdf3vfp", ISD::SETCC_INVALID },
261 { RTLIB::MUL_F64, "__muldf3vfp", ISD::SETCC_INVALID },
262 { RTLIB::DIV_F64, "__divdf3vfp", ISD::SETCC_INVALID },
263
264 // Single-precision comparisons.
265 { RTLIB::OEQ_F32, "__eqsf2vfp", ISD::SETNE },
266 { RTLIB::UNE_F32, "__nesf2vfp", ISD::SETNE },
267 { RTLIB::OLT_F32, "__ltsf2vfp", ISD::SETNE },
268 { RTLIB::OLE_F32, "__lesf2vfp", ISD::SETNE },
269 { RTLIB::OGE_F32, "__gesf2vfp", ISD::SETNE },
270 { RTLIB::OGT_F32, "__gtsf2vfp", ISD::SETNE },
271 { RTLIB::UO_F32, "__unordsf2vfp", ISD::SETNE },
272 { RTLIB::O_F32, "__unordsf2vfp", ISD::SETEQ },
273
274 // Double-precision comparisons.
275 { RTLIB::OEQ_F64, "__eqdf2vfp", ISD::SETNE },
276 { RTLIB::UNE_F64, "__nedf2vfp", ISD::SETNE },
277 { RTLIB::OLT_F64, "__ltdf2vfp", ISD::SETNE },
278 { RTLIB::OLE_F64, "__ledf2vfp", ISD::SETNE },
279 { RTLIB::OGE_F64, "__gedf2vfp", ISD::SETNE },
280 { RTLIB::OGT_F64, "__gtdf2vfp", ISD::SETNE },
281 { RTLIB::UO_F64, "__unorddf2vfp", ISD::SETNE },
282 { RTLIB::O_F64, "__unorddf2vfp", ISD::SETEQ },
283
284 // Floating-point to integer conversions.
285 // i64 conversions are done via library routines even when generating VFP
286 // instructions, so use the same ones.
287 { RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp", ISD::SETCC_INVALID },
288 { RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp", ISD::SETCC_INVALID },
289 { RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp", ISD::SETCC_INVALID },
290 { RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp", ISD::SETCC_INVALID },
291
292 // Conversions between floating types.
293 { RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp", ISD::SETCC_INVALID },
294 { RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp", ISD::SETCC_INVALID },
295
296 // Integer to floating-point conversions.
297 // i64 conversions are done via library routines even when generating VFP
298 // instructions, so use the same ones.
299 // FIXME: There appears to be some naming inconsistency in ARM libgcc:
300 // e.g., __floatunsidf vs. __floatunssidfvfp.
301 { RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp", ISD::SETCC_INVALID },
302 { RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp", ISD::SETCC_INVALID },
303 { RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp", ISD::SETCC_INVALID },
304 { RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp", ISD::SETCC_INVALID },
305 };
306
307 for (const auto &LC : LibraryCalls) {
308 setLibcallName(LC.Op, LC.Name);
309 if (LC.Cond != ISD::SETCC_INVALID)
310 setCmpLibcallCC(LC.Op, LC.Cond);
311 }
312 }
313
314 // Set the correct calling convention for ARMv7k WatchOS. It's just
315 // AAPCS_VFP for functions as simple as libcalls.
316 if (Subtarget->isTargetWatchABI()) {
317 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i)
318 setLibcallCallingConv((RTLIB::Libcall)i, CallingConv::ARM_AAPCS_VFP);
319 }
320 }
321
322 // These libcalls are not available in 32-bit.
323 setLibcallName(RTLIB::SHL_I128, nullptr);
324 setLibcallName(RTLIB::SRL_I128, nullptr);
325 setLibcallName(RTLIB::SRA_I128, nullptr);
326
327 // RTLIB
328 if (Subtarget->isAAPCS_ABI() &&
329 (Subtarget->isTargetAEABI() || Subtarget->isTargetGNUAEABI() ||
330 Subtarget->isTargetMuslAEABI() || Subtarget->isTargetAndroid())) {
331 static const struct {
332 const RTLIB::Libcall Op;
333 const char * const Name;
334 const CallingConv::ID CC;
335 const ISD::CondCode Cond;
336 } LibraryCalls[] = {
337 // Double-precision floating-point arithmetic helper functions
338 // RTABI chapter 4.1.2, Table 2
339 { RTLIB::ADD_F64, "__aeabi_dadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
340 { RTLIB::DIV_F64, "__aeabi_ddiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
341 { RTLIB::MUL_F64, "__aeabi_dmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
342 { RTLIB::SUB_F64, "__aeabi_dsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
343
344 // Double-precision floating-point comparison helper functions
345 // RTABI chapter 4.1.2, Table 3
346 { RTLIB::OEQ_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
347 { RTLIB::UNE_F64, "__aeabi_dcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
348 { RTLIB::OLT_F64, "__aeabi_dcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
349 { RTLIB::OLE_F64, "__aeabi_dcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
350 { RTLIB::OGE_F64, "__aeabi_dcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
351 { RTLIB::OGT_F64, "__aeabi_dcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
352 { RTLIB::UO_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
353 { RTLIB::O_F64, "__aeabi_dcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
354
355 // Single-precision floating-point arithmetic helper functions
356 // RTABI chapter 4.1.2, Table 4
357 { RTLIB::ADD_F32, "__aeabi_fadd", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
358 { RTLIB::DIV_F32, "__aeabi_fdiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
359 { RTLIB::MUL_F32, "__aeabi_fmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
360 { RTLIB::SUB_F32, "__aeabi_fsub", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
361
362 // Single-precision floating-point comparison helper functions
363 // RTABI chapter 4.1.2, Table 5
364 { RTLIB::OEQ_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETNE },
365 { RTLIB::UNE_F32, "__aeabi_fcmpeq", CallingConv::ARM_AAPCS, ISD::SETEQ },
366 { RTLIB::OLT_F32, "__aeabi_fcmplt", CallingConv::ARM_AAPCS, ISD::SETNE },
367 { RTLIB::OLE_F32, "__aeabi_fcmple", CallingConv::ARM_AAPCS, ISD::SETNE },
368 { RTLIB::OGE_F32, "__aeabi_fcmpge", CallingConv::ARM_AAPCS, ISD::SETNE },
369 { RTLIB::OGT_F32, "__aeabi_fcmpgt", CallingConv::ARM_AAPCS, ISD::SETNE },
370 { RTLIB::UO_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETNE },
371 { RTLIB::O_F32, "__aeabi_fcmpun", CallingConv::ARM_AAPCS, ISD::SETEQ },
372
373 // Floating-point to integer conversions.
374 // RTABI chapter 4.1.2, Table 6
375 { RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
376 { RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
377 { RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
378 { RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
379 { RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
380 { RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
381 { RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
382 { RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
383
384 // Conversions between floating types.
385 // RTABI chapter 4.1.2, Table 7
386 { RTLIB::FPROUND_F64_F32, "__aeabi_d2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
387 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
388 { RTLIB::FPEXT_F32_F64, "__aeabi_f2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
389
390 // Integer to floating-point conversions.
391 // RTABI chapter 4.1.2, Table 8
392 { RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
393 { RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
394 { RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
395 { RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
396 { RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
397 { RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
398 { RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
399 { RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
400
401 // Long long helper functions
402 // RTABI chapter 4.2, Table 9
403 { RTLIB::MUL_I64, "__aeabi_lmul", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
404 { RTLIB::SHL_I64, "__aeabi_llsl", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
405 { RTLIB::SRL_I64, "__aeabi_llsr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
406 { RTLIB::SRA_I64, "__aeabi_lasr", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
407
408 // Integer division functions
409 // RTABI chapter 4.3.1
410 { RTLIB::SDIV_I8, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
411 { RTLIB::SDIV_I16, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
412 { RTLIB::SDIV_I32, "__aeabi_idiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
413 { RTLIB::SDIV_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
414 { RTLIB::UDIV_I8, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
415 { RTLIB::UDIV_I16, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
416 { RTLIB::UDIV_I32, "__aeabi_uidiv", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
417 { RTLIB::UDIV_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
418 };
419
420 for (const auto &LC : LibraryCalls) {
421 setLibcallName(LC.Op, LC.Name);
422 setLibcallCallingConv(LC.Op, LC.CC);
423 if (LC.Cond != ISD::SETCC_INVALID)
424 setCmpLibcallCC(LC.Op, LC.Cond);
425 }
426
427 // EABI dependent RTLIB
428 if (TM.Options.EABIVersion == EABI::EABI4 ||
429 TM.Options.EABIVersion == EABI::EABI5) {
430 static const struct {
431 const RTLIB::Libcall Op;
432 const char *const Name;
433 const CallingConv::ID CC;
434 const ISD::CondCode Cond;
435 } MemOpsLibraryCalls[] = {
436 // Memory operations
437 // RTABI chapter 4.3.4
438 { RTLIB::MEMCPY, "__aeabi_memcpy", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
439 { RTLIB::MEMMOVE, "__aeabi_memmove", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
440 { RTLIB::MEMSET, "__aeabi_memset", CallingConv::ARM_AAPCS, ISD::SETCC_INVALID },
441 };
442
443 for (const auto &LC : MemOpsLibraryCalls) {
444 setLibcallName(LC.Op, LC.Name);
445 setLibcallCallingConv(LC.Op, LC.CC);
446 if (LC.Cond != ISD::SETCC_INVALID)
447 setCmpLibcallCC(LC.Op, LC.Cond);
448 }
449 }
450 }
451
452 if (Subtarget->isTargetWindows()) {
453 static const struct {
Excessive padding in 'struct (anonymous at /tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp:453:18)' (8 padding bytes, where 0 is optimal). Optimal fields order: Name, Op, CC, consider reordering the fields or adding explicit padding members
454 const RTLIB::Libcall Op;
455 const char * const Name;
456 const CallingConv::ID CC;
457 } LibraryCalls[] = {
458 { RTLIB::FPTOSINT_F32_I64, "__stoi64", CallingConv::ARM_AAPCS_VFP },
459 { RTLIB::FPTOSINT_F64_I64, "__dtoi64", CallingConv::ARM_AAPCS_VFP },
460 { RTLIB::FPTOUINT_F32_I64, "__stou64", CallingConv::ARM_AAPCS_VFP },
461 { RTLIB::FPTOUINT_F64_I64, "__dtou64", CallingConv::ARM_AAPCS_VFP },
462 { RTLIB::SINTTOFP_I64_F32, "__i64tos", CallingConv::ARM_AAPCS_VFP },
463 { RTLIB::SINTTOFP_I64_F64, "__i64tod", CallingConv::ARM_AAPCS_VFP },
464 { RTLIB::UINTTOFP_I64_F32, "__u64tos", CallingConv::ARM_AAPCS_VFP },
465 { RTLIB::UINTTOFP_I64_F64, "__u64tod", CallingConv::ARM_AAPCS_VFP },
466 };
467
468 for (const auto &LC : LibraryCalls) {
469 setLibcallName(LC.Op, LC.Name);
470 setLibcallCallingConv(LC.Op, LC.CC);
471 }
472 }
473
474 // Use divmod compiler-rt calls for iOS 5.0 and later.
475 if (Subtarget->isTargetWatchOS() ||
476 (Subtarget->isTargetIOS() &&
477 !Subtarget->getTargetTriple().isOSVersionLT(5, 0))) {
478 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
479 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
480 }
481
482 // The half <-> float conversion functions are always soft-float on
483 // non-watchos platforms, but are needed for some targets which use a
484 // hard-float calling convention by default.
485 if (!Subtarget->isTargetWatchABI()) {
486 if (Subtarget->isAAPCS_ABI()) {
487 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_AAPCS);
488 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_AAPCS);
489 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_AAPCS);
490 } else {
491 setLibcallCallingConv(RTLIB::FPROUND_F32_F16, CallingConv::ARM_APCS);
492 setLibcallCallingConv(RTLIB::FPROUND_F64_F16, CallingConv::ARM_APCS);
493 setLibcallCallingConv(RTLIB::FPEXT_F16_F32, CallingConv::ARM_APCS);
494 }
495 }
496
497 // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
498 // a __gnu_ prefix (which is the default).
499 if (Subtarget->isTargetAEABI()) {
500 static const struct {
501 const RTLIB::Libcall Op;
502 const char * const Name;
503 const CallingConv::ID CC;
504 } LibraryCalls[] = {
505 { RTLIB::FPROUND_F32_F16, "__aeabi_f2h", CallingConv::ARM_AAPCS },
506 { RTLIB::FPROUND_F64_F16, "__aeabi_d2h", CallingConv::ARM_AAPCS },
507 { RTLIB::FPEXT_F16_F32, "__aeabi_h2f", CallingConv::ARM_AAPCS },
508 };
509
510 for (const auto &LC : LibraryCalls) {
511 setLibcallName(LC.Op, LC.Name);
512 setLibcallCallingConv(LC.Op, LC.CC);
513 }
514 }
515
516 if (Subtarget->isThumb1Only())
517 addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
518 else
519 addRegisterClass(MVT::i32, &ARM::GPRRegClass);
520
521 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
522 !Subtarget->isThumb1Only()) {
523 addRegisterClass(MVT::f32, &ARM::SPRRegClass);
524 addRegisterClass(MVT::f64, &ARM::DPRRegClass);
525 }
526
527 for (MVT VT : MVT::vector_valuetypes()) {
528 for (MVT InnerVT : MVT::vector_valuetypes()) {
529 setTruncStoreAction(VT, InnerVT, Expand);
530 setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
531 setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
532 setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
533 }
534
535 setOperationAction(ISD::MULHS, VT, Expand);
536 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
537 setOperationAction(ISD::MULHU, VT, Expand);
538 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
539
540 setOperationAction(ISD::BSWAP, VT, Expand);
541 }
542
543 setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
544 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
545
546 setOperationAction(ISD::READ_REGISTER, MVT::i64, Custom);
547 setOperationAction(ISD::WRITE_REGISTER, MVT::i64, Custom);
548
549 if (Subtarget->hasNEON()) {
550 addDRTypeForNEON(MVT::v2f32);
551 addDRTypeForNEON(MVT::v8i8);
552 addDRTypeForNEON(MVT::v4i16);
553 addDRTypeForNEON(MVT::v2i32);
554 addDRTypeForNEON(MVT::v1i64);
555
556 addQRTypeForNEON(MVT::v4f32);
557 addQRTypeForNEON(MVT::v2f64);
558 addQRTypeForNEON(MVT::v16i8);
559 addQRTypeForNEON(MVT::v8i16);
560 addQRTypeForNEON(MVT::v4i32);
561 addQRTypeForNEON(MVT::v2i64);
562
563 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but
564 // neither Neon nor VFP support any arithmetic operations on it.
565 // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively
566 // supported for v4f32.
567 setOperationAction(ISD::FADD, MVT::v2f64, Expand);
568 setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
569 setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
570 // FIXME: Code duplication: FDIV and FREM are expanded always, see
571 // ARMTargetLowering::addTypeForNEON method for details.
572 setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
573 setOperationAction(ISD::FREM, MVT::v2f64, Expand);
574 // FIXME: Create unittest.
575 // In another words, find a way when "copysign" appears in DAG with vector
576 // operands.
577 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand);
578 // FIXME: Code duplication: SETCC has custom operation action, see
579 // ARMTargetLowering::addTypeForNEON method for details.
580 setOperationAction(ISD::SETCC, MVT::v2f64, Expand);
581 // FIXME: Create unittest for FNEG and for FABS.
582 setOperationAction(ISD::FNEG, MVT::v2f64, Expand);
583 setOperationAction(ISD::FABS, MVT::v2f64, Expand);
584 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand);
585 setOperationAction(ISD::FSIN, MVT::v2f64, Expand);
586 setOperationAction(ISD::FCOS, MVT::v2f64, Expand);
587 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand);
588 setOperationAction(ISD::FPOW, MVT::v2f64, Expand);
589 setOperationAction(ISD::FLOG, MVT::v2f64, Expand);
590 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand);
591 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand);
592 setOperationAction(ISD::FEXP, MVT::v2f64, Expand);
593 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand);
594 // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR.
595 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand);
596 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand);
597 setOperationAction(ISD::FRINT, MVT::v2f64, Expand);
598 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand);
599 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand);
600 setOperationAction(ISD::FMA, MVT::v2f64, Expand);
601
602 setOperationAction(ISD::FSQRT, MVT::v4f32, Expand);
603 setOperationAction(ISD::FSIN, MVT::v4f32, Expand);
604 setOperationAction(ISD::FCOS, MVT::v4f32, Expand);
605 setOperationAction(ISD::FPOWI, MVT::v4f32, Expand);
606 setOperationAction(ISD::FPOW, MVT::v4f32, Expand);
607 setOperationAction(ISD::FLOG, MVT::v4f32, Expand);
608 setOperationAction(ISD::FLOG2, MVT::v4f32, Expand);
609 setOperationAction(ISD::FLOG10, MVT::v4f32, Expand);
610 setOperationAction(ISD::FEXP, MVT::v4f32, Expand);
611 setOperationAction(ISD::FEXP2, MVT::v4f32, Expand);
612 setOperationAction(ISD::FCEIL, MVT::v4f32, Expand);
613 setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand);
614 setOperationAction(ISD::FRINT, MVT::v4f32, Expand);
615 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand);
616 setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand);
617
618 // Mark v2f32 intrinsics.
619 setOperationAction(ISD::FSQRT, MVT::v2f32, Expand);
620 setOperationAction(ISD::FSIN, MVT::v2f32, Expand);
621 setOperationAction(ISD::FCOS, MVT::v2f32, Expand);
622 setOperationAction(ISD::FPOWI, MVT::v2f32, Expand);
623 setOperationAction(ISD::FPOW, MVT::v2f32, Expand);
624 setOperationAction(ISD::FLOG, MVT::v2f32, Expand);
625 setOperationAction(ISD::FLOG2, MVT::v2f32, Expand);
626 setOperationAction(ISD::FLOG10, MVT::v2f32, Expand);
627 setOperationAction(ISD::FEXP, MVT::v2f32, Expand);
628 setOperationAction(ISD::FEXP2, MVT::v2f32, Expand);
629 setOperationAction(ISD::FCEIL, MVT::v2f32, Expand);
630 setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand);
631 setOperationAction(ISD::FRINT, MVT::v2f32, Expand);
632 setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand);
633 setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand);
634
635 // Neon does not support some operations on v1i64 and v2i64 types.
636 setOperationAction(ISD::MUL, MVT::v1i64, Expand);
637 // Custom handling for some quad-vector types to detect VMULL.
638 setOperationAction(ISD::MUL, MVT::v8i16, Custom);
639 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
640 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
641 // Custom handling for some vector types to avoid expensive expansions
642 setOperationAction(ISD::SDIV, MVT::v4i16, Custom);
643 setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
644 setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
645 setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
646 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
647 // a destination type that is wider than the source, and nor does
648 // it have a FP_TO_[SU]INT instruction with a narrower destination than
649 // source.
650 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom);
651 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom);
652 setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom);
653 setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom);
654
655 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
656 setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
657
658 // NEON does not have single instruction CTPOP for vectors with element
659 // types wider than 8-bits. However, custom lowering can leverage the
660 // v8i8/v16i8 vcnt instruction.
661 setOperationAction(ISD::CTPOP, MVT::v2i32, Custom);
662 setOperationAction(ISD::CTPOP, MVT::v4i32, Custom);
663 setOperationAction(ISD::CTPOP, MVT::v4i16, Custom);
664 setOperationAction(ISD::CTPOP, MVT::v8i16, Custom);
665 setOperationAction(ISD::CTPOP, MVT::v1i64, Expand);
666 setOperationAction(ISD::CTPOP, MVT::v2i64, Expand);
667
668 setOperationAction(ISD::CTLZ, MVT::v1i64, Expand);
669 setOperationAction(ISD::CTLZ, MVT::v2i64, Expand);
670
671 // NEON does not have single instruction CTTZ for vectors.
672 setOperationAction(ISD::CTTZ, MVT::v8i8, Custom);
673 setOperationAction(ISD::CTTZ, MVT::v4i16, Custom);
674 setOperationAction(ISD::CTTZ, MVT::v2i32, Custom);
675 setOperationAction(ISD::CTTZ, MVT::v1i64, Custom);
676
677 setOperationAction(ISD::CTTZ, MVT::v16i8, Custom);
678 setOperationAction(ISD::CTTZ, MVT::v8i16, Custom);
679 setOperationAction(ISD::CTTZ, MVT::v4i32, Custom);
680 setOperationAction(ISD::CTTZ, MVT::v2i64, Custom);
681
682 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i8, Custom);
683 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i16, Custom);
684 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i32, Custom);
685 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v1i64, Custom);
686
687 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v16i8, Custom);
688 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v8i16, Custom);
689 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v4i32, Custom);
690 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::v2i64, Custom);
691
692 // NEON only has FMA instructions as of VFP4.
693 if (!Subtarget->hasVFP4()) {
694 setOperationAction(ISD::FMA, MVT::v2f32, Expand);
695 setOperationAction(ISD::FMA, MVT::v4f32, Expand);
696 }
697
698 setTargetDAGCombine(ISD::INTRINSIC_VOID);
699 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN);
700 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
701 setTargetDAGCombine(ISD::SHL);
702 setTargetDAGCombine(ISD::SRL);
703 setTargetDAGCombine(ISD::SRA);
704 setTargetDAGCombine(ISD::SIGN_EXTEND);
705 setTargetDAGCombine(ISD::ZERO_EXTEND);
706 setTargetDAGCombine(ISD::ANY_EXTEND);
707 setTargetDAGCombine(ISD::BUILD_VECTOR);
708 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
709 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
710 setTargetDAGCombine(ISD::STORE);
711 setTargetDAGCombine(ISD::FP_TO_SINT);
712 setTargetDAGCombine(ISD::FP_TO_UINT);
713 setTargetDAGCombine(ISD::FDIV);
714 setTargetDAGCombine(ISD::LOAD);
715
716 // It is legal to extload from v4i8 to v4i16 or v4i32.
717 for (MVT Ty : {MVT::v8i8, MVT::v4i8, MVT::v2i8, MVT::v4i16, MVT::v2i16,
718 MVT::v2i32}) {
719 for (MVT VT : MVT::integer_vector_valuetypes()) {
720 setLoadExtAction(ISD::EXTLOAD, VT, Ty, Legal);
721 setLoadExtAction(ISD::ZEXTLOAD, VT, Ty, Legal);
722 setLoadExtAction(ISD::SEXTLOAD, VT, Ty, Legal);
723 }
724 }
725 }
726
727 if (Subtarget->isFPOnlySP()) {
728 // When targeting a floating-point unit with only single-precision
729 // operations, f64 is legal for the few double-precision instructions which
730 // are present However, no double-precision operations other than moves,
731 // loads and stores are provided by the hardware.
732 setOperationAction(ISD::FADD, MVT::f64, Expand);
733 setOperationAction(ISD::FSUB, MVT::f64, Expand);
734 setOperationAction(ISD::FMUL, MVT::f64, Expand);
735 setOperationAction(ISD::FMA, MVT::f64, Expand);
736 setOperationAction(ISD::FDIV, MVT::f64, Expand);
737 setOperationAction(ISD::FREM, MVT::f64, Expand);
738 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
739 setOperationAction(ISD::FGETSIGN, MVT::f64, Expand);
740 setOperationAction(ISD::FNEG, MVT::f64, Expand);
741 setOperationAction(ISD::FABS, MVT::f64, Expand);
742 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
743 setOperationAction(ISD::FSIN, MVT::f64, Expand);
744 setOperationAction(ISD::FCOS, MVT::f64, Expand);
745 setOperationAction(ISD::FPOWI, MVT::f64, Expand);
746 setOperationAction(ISD::FPOW, MVT::f64, Expand);
747 setOperationAction(ISD::FLOG, MVT::f64, Expand);
748 setOperationAction(ISD::FLOG2, MVT::f64, Expand);
749 setOperationAction(ISD::FLOG10, MVT::f64, Expand);
750 setOperationAction(ISD::FEXP, MVT::f64, Expand);
751 setOperationAction(ISD::FEXP2, MVT::f64, Expand);
752 setOperationAction(ISD::FCEIL, MVT::f64, Expand);
753 setOperationAction(ISD::FTRUNC, MVT::f64, Expand);
754 setOperationAction(ISD::FRINT, MVT::f64, Expand);
755 setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand);
756 setOperationAction(ISD::FFLOOR, MVT::f64, Expand);
757 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
758 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
759 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
760 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
761 setOperationAction(ISD::FP_TO_SINT, MVT::f64, Custom);
762 setOperationAction(ISD::FP_TO_UINT, MVT::f64, Custom);
763 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
764 setOperationAction(ISD::FP_EXTEND, MVT::f64, Custom);
765 }
766
767 computeRegisterProperties(Subtarget->getRegisterInfo());
768
769 // ARM does not have floating-point extending loads.
770 for (MVT VT : MVT::fp_valuetypes()) {
771 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
772 setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand);
773 }
774
775 // ... or truncating stores
776 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
777 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
778 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
779
780 // ARM does not have i1 sign extending load.
781 for (MVT VT : MVT::integer_valuetypes())
782 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
783
784 // ARM supports all 4 flavors of integer indexed load / store.
785 if (!Subtarget->isThumb1Only()) {
786 for (unsigned im = (unsigned)ISD::PRE_INC;
787 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
788 setIndexedLoadAction(im, MVT::i1, Legal);
789 setIndexedLoadAction(im, MVT::i8, Legal);
790 setIndexedLoadAction(im, MVT::i16, Legal);
791 setIndexedLoadAction(im, MVT::i32, Legal);
792 setIndexedStoreAction(im, MVT::i1, Legal);
793 setIndexedStoreAction(im, MVT::i8, Legal);
794 setIndexedStoreAction(im, MVT::i16, Legal);
795 setIndexedStoreAction(im, MVT::i32, Legal);
796 }
797 } else {
798 // Thumb-1 has limited post-inc load/store support - LDM r0!, {r1}.
799 setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal);
800 setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal);
801 }
802
803 setOperationAction(ISD::SADDO, MVT::i32, Custom);
804 setOperationAction(ISD::UADDO, MVT::i32, Custom);
805 setOperationAction(ISD::SSUBO, MVT::i32, Custom);
806 setOperationAction(ISD::USUBO, MVT::i32, Custom);
807
808 // i64 operation support.
809 setOperationAction(ISD::MUL, MVT::i64, Expand);
810 setOperationAction(ISD::MULHU, MVT::i32, Expand);
811 if (Subtarget->isThumb1Only()) {
812 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
813 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
814 }
815 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()
816 || (Subtarget->isThumb2() && !Subtarget->hasDSP()))
817 setOperationAction(ISD::MULHS, MVT::i32, Expand);
818
819 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
820 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom);
821 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
822 setOperationAction(ISD::SRL, MVT::i64, Custom);
823 setOperationAction(ISD::SRA, MVT::i64, Custom);
824
825 setOperationAction(ISD::ADDC, MVT::i32, Custom);
826 setOperationAction(ISD::ADDE, MVT::i32, Custom);
827 setOperationAction(ISD::SUBC, MVT::i32, Custom);
828 setOperationAction(ISD::SUBE, MVT::i32, Custom);
829
830 if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
831 setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
832
833 // ARM does not have ROTL.
834 setOperationAction(ISD::ROTL, MVT::i32, Expand);
835 for (MVT VT : MVT::vector_valuetypes()) {
836 setOperationAction(ISD::ROTL, VT, Expand);
837 setOperationAction(ISD::ROTR, VT, Expand);
838 }
839 setOperationAction(ISD::CTTZ, MVT::i32, Custom);
840 setOperationAction(ISD::CTPOP, MVT::i32, Expand);
841 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only())
842 setOperationAction(ISD::CTLZ, MVT::i32, Expand);
843
844 // @llvm.readcyclecounter requires the Performance Monitors extension.
845 // Default to the 0 expansion on unsupported platforms.
846 // FIXME: Technically there are older ARM CPUs that have
847 // implementation-specific ways of obtaining this information.
848 if (Subtarget->hasPerfMon())
849 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
850
851 // Only ARMv6 has BSWAP.
852 if (!Subtarget->hasV6Ops())
853 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
854
855 bool hasDivide = Subtarget->isThumb() ? Subtarget->hasDivide()
856 : Subtarget->hasDivideInARMMode();
857 if (!hasDivide) {
858 // These are expanded into libcalls if the cpu doesn't have HW divider.
859 setOperationAction(ISD::SDIV, MVT::i32, LibCall);
860 setOperationAction(ISD::UDIV, MVT::i32, LibCall);
861 }
862
863 if (Subtarget->isTargetWindows() && !Subtarget->hasDivide()) {
864 setOperationAction(ISD::SDIV, MVT::i32, Custom);
865 setOperationAction(ISD::UDIV, MVT::i32, Custom);
866
867 setOperationAction(ISD::SDIV, MVT::i64, Custom);
868 setOperationAction(ISD::UDIV, MVT::i64, Custom);
869 }
870
871 setOperationAction(ISD::SREM, MVT::i32, Expand);
872 setOperationAction(ISD::UREM, MVT::i32, Expand);
873
874 // Register based DivRem for AEABI (RTABI 4.2)
875 if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
876 Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
877 Subtarget->isTargetWindows()) {
878 setOperationAction(ISD::SREM, MVT::i64, Custom);
879 setOperationAction(ISD::UREM, MVT::i64, Custom);
880 HasStandaloneRem = false;
881
882 if (Subtarget->isTargetWindows()) {
883 const struct {
884 const RTLIB::Libcall Op;
885 const char * const Name;
886 const CallingConv::ID CC;
887 } LibraryCalls[] = {
888 { RTLIB::SDIVREM_I8, "__rt_sdiv", CallingConv::ARM_AAPCS },
889 { RTLIB::SDIVREM_I16, "__rt_sdiv", CallingConv::ARM_AAPCS },
890 { RTLIB::SDIVREM_I32, "__rt_sdiv", CallingConv::ARM_AAPCS },
891 { RTLIB::SDIVREM_I64, "__rt_sdiv64", CallingConv::ARM_AAPCS },
892
893 { RTLIB::UDIVREM_I8, "__rt_udiv", CallingConv::ARM_AAPCS },
894 { RTLIB::UDIVREM_I16, "__rt_udiv", CallingConv::ARM_AAPCS },
895 { RTLIB::UDIVREM_I32, "__rt_udiv", CallingConv::ARM_AAPCS },
896 { RTLIB::UDIVREM_I64, "__rt_udiv64", CallingConv::ARM_AAPCS },
897 };
898
899 for (const auto &LC : LibraryCalls) {
900 setLibcallName(LC.Op, LC.Name);
901 setLibcallCallingConv(LC.Op, LC.CC);
902 }
903 } else {
904 const struct {
905 const RTLIB::Libcall Op;
906 const char * const Name;
907 const CallingConv::ID CC;
908 } LibraryCalls[] = {
909 { RTLIB::SDIVREM_I8, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
910 { RTLIB::SDIVREM_I16, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
911 { RTLIB::SDIVREM_I32, "__aeabi_idivmod", CallingConv::ARM_AAPCS },
912 { RTLIB::SDIVREM_I64, "__aeabi_ldivmod", CallingConv::ARM_AAPCS },
913
914 { RTLIB::UDIVREM_I8, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
915 { RTLIB::UDIVREM_I16, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
916 { RTLIB::UDIVREM_I32, "__aeabi_uidivmod", CallingConv::ARM_AAPCS },
917 { RTLIB::UDIVREM_I64, "__aeabi_uldivmod", CallingConv::ARM_AAPCS },
918 };
919
920 for (const auto &LC : LibraryCalls) {
921 setLibcallName(LC.Op, LC.Name);
922 setLibcallCallingConv(LC.Op, LC.CC);
923 }
924 }
925
926 setOperationAction(ISD::SDIVREM, MVT::i32, Custom);
927 setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
928 setOperationAction(ISD::SDIVREM, MVT::i64, Custom);
929 setOperationAction(ISD::UDIVREM, MVT::i64, Custom);
930 } else {
931 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
932 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
933 }
934
935 if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
936 for (auto &VT : {MVT::f32, MVT::f64})
937 setOperationAction(ISD::FPOWI, VT, Custom);
938
939 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
940 setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
941 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
942 setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
943
944 setOperationAction(ISD::TRAP, MVT::Other, Legal);
945
946 // Use the default implementation.
947 setOperationAction(ISD::VASTART, MVT::Other, Custom);
948 setOperationAction(ISD::VAARG, MVT::Other, Expand);
949 setOperationAction(ISD::VACOPY, MVT::Other, Expand);
950 setOperationAction(ISD::VAEND, MVT::Other, Expand);
951 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
952 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
953
954 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
955 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
956 else
957 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
958
959 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use
960 // the default expansion.
961 InsertFencesForAtomic = false;
962 if (Subtarget->hasAnyDataBarrier() &&
963 (!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps())) {
964 // ATOMIC_FENCE needs custom lowering; the others should have been expanded
965 // to ldrex/strex loops already.
966 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
967 if (!Subtarget->isThumb() || !Subtarget->isMClass())
968 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
969
970 // On v8, we have particularly efficient implementations of atomic fences
971 // if they can be combined with nearby atomic loads and stores.
972 if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
973 // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
974 InsertFencesForAtomic = true;
975 }
976 } else {
977 // If there's anything we can use as a barrier, go through custom lowering
978 // for ATOMIC_FENCE.
979 // If target has DMB in thumb, Fences can be inserted.
980 if (Subtarget->hasDataBarrier())
981 InsertFencesForAtomic = true;
982
983 setOperationAction(ISD::ATOMIC_FENCE, MVT::Other,
984 Subtarget->hasAnyDataBarrier() ? Custom : Expand);
985
986 // Set them all for expansion, which will force libcalls.
987 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand);
988 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand);
989 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand);
990 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand);
991 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand);
992 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand);
993 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand);
994 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand);
995 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand);
996 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand);
997 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand);
998 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
999 // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
1000 // Unordered/Monotonic case.
1001 if (!InsertFencesForAtomic) {
1002 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
1003 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
1004 }
1005 }
1006
1007 setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
1008
1009 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes.
1010 if (!Subtarget->hasV6Ops()) {
1011 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
1012 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
1013 }
1014 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
1015
1016 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1017 !Subtarget->isThumb1Only()) {
1018 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR
1019 // iff target supports vfp2.
1020 setOperationAction(ISD::BITCAST, MVT::i64, Custom);
1021 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom);
1022 }
1023
1024 // We want to custom lower some of our intrinsics.
1025 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1026 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
1027 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
1028 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
1029 if (Subtarget->useSjLjEH())
1030 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
1031
1032 setOperationAction(ISD::SETCC, MVT::i32, Expand);
1033 setOperationAction(ISD::SETCC, MVT::f32, Expand);
1034 setOperationAction(ISD::SETCC, MVT::f64, Expand);
1035 setOperationAction(ISD::SELECT, MVT::i32, Custom);
1036 setOperationAction(ISD::SELECT, MVT::f32, Custom);
1037 setOperationAction(ISD::SELECT, MVT::f64, Custom);
1038 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
1039 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
1040 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
1041
1042 // Thumb-1 cannot currently select ARMISD::SUBE.
1043 if (!Subtarget->isThumb1Only())
1044 setOperationAction(ISD::SETCCE, MVT::i32, Custom);
1045
1046 setOperationAction(ISD::BRCOND, MVT::Other, Expand);
1047 setOperationAction(ISD::BR_CC, MVT::i32, Custom);
1048 setOperationAction(ISD::BR_CC, MVT::f32, Custom);
1049 setOperationAction(ISD::BR_CC, MVT::f64, Custom);
1050 setOperationAction(ISD::BR_JT, MVT::Other, Custom);
1051
1052 // We don't support sin/cos/fmod/copysign/pow
1053 setOperationAction(ISD::FSIN, MVT::f64, Expand);
1054 setOperationAction(ISD::FSIN, MVT::f32, Expand);
1055 setOperationAction(ISD::FCOS, MVT::f32, Expand);
1056 setOperationAction(ISD::FCOS, MVT::f64, Expand);
1057 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
1058 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
1059 setOperationAction(ISD::FREM, MVT::f64, Expand);
1060 setOperationAction(ISD::FREM, MVT::f32, Expand);
1061 if (!Subtarget->useSoftFloat() && Subtarget->hasVFP2() &&
1062 !Subtarget->isThumb1Only()) {
1063 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom);
1064 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
1065 }
1066 setOperationAction(ISD::FPOW, MVT::f64, Expand);
1067 setOperationAction(ISD::FPOW, MVT::f32, Expand);
1068
1069 if (!Subtarget->hasVFP4()) {
1070 setOperationAction(ISD::FMA, MVT::f64, Expand);
1071 setOperationAction(ISD::FMA, MVT::f32, Expand);
1072 }
1073
1074 // Various VFP goodness
1075 if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only()) {
1076 // FP-ARMv8 adds f64 <-> f16 conversion. Before that it should be expanded.
1077 if (!Subtarget->hasFPARMv8() || Subtarget->isFPOnlySP()) {
1078 setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
1079 setOperationAction(ISD::FP_TO_FP16, MVT::f64, Expand);
1080 }
1081
1082 // fp16 is a special v7 extension that adds f16 <-> f32 conversions.
1083 if (!Subtarget->hasFP16()) {
1084 setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
1085 setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
1086 }
1087 }
1088
1089 // Combine sin / cos into one node or libcall if possible.
1090 if (Subtarget->hasSinCos()) {
1091 setLibcallName(RTLIB::SINCOS_F32, "sincosf");
1092 setLibcallName(RTLIB::SINCOS_F64, "sincos");
1093 if (Subtarget->isTargetWatchABI()) {
1094 setLibcallCallingConv(RTLIB::SINCOS_F32, CallingConv::ARM_AAPCS_VFP);
1095 setLibcallCallingConv(RTLIB::SINCOS_F64, CallingConv::ARM_AAPCS_VFP);
1096 }
1097 if (Subtarget->isTargetIOS() || Subtarget->isTargetWatchOS()) {
1098 // For iOS, we don't want to the normal expansion of a libcall to
1099 // sincos. We want to issue a libcall to __sincos_stret.
1100 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1101 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1102 }
1103 }
1104
1105 // FP-ARMv8 implements a lot of rounding-like FP operations.
1106 if (Subtarget->hasFPARMv8()) {
1107 setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
1108 setOperationAction(ISD::FCEIL, MVT::f32, Legal);
1109 setOperationAction(ISD::FROUND, MVT::f32, Legal);
1110 setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
1111 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
1112 setOperationAction(ISD::FRINT, MVT::f32, Legal);
1113 setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
1114 setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
1115 setOperationAction(ISD::FMINNUM, MVT::v2f32, Legal);
1116 setOperationAction(ISD::FMAXNUM, MVT::v2f32, Legal);
1117 setOperationAction(ISD::FMINNUM, MVT::v4f32, Legal);
1118 setOperationAction(ISD::FMAXNUM, MVT::v4f32, Legal);
1119
1120 if (!Subtarget->isFPOnlySP()) {
1121 setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
1122 setOperationAction(ISD::FCEIL, MVT::f64, Legal);
1123 setOperationAction(ISD::FROUND, MVT::f64, Legal);
1124 setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
1125 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
1126 setOperationAction(ISD::FRINT, MVT::f64, Legal);
1127 setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
1128 setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
1129 }
1130 }
1131
1132 if (Subtarget->hasNEON()) {
1133 // vmin and vmax aren't available in a scalar form, so we use
1134 // a NEON instruction with an undef lane instead.
1135 setOperationAction(ISD::FMINNAN, MVT::f32, Legal);
1136 setOperationAction(ISD::FMAXNAN, MVT::f32, Legal);
1137 setOperationAction(ISD::FMINNAN, MVT::v2f32, Legal);
1138 setOperationAction(ISD::FMAXNAN, MVT::v2f32, Legal);
1139 setOperationAction(ISD::FMINNAN, MVT::v4f32, Legal);
1140 setOperationAction(ISD::FMAXNAN, MVT::v4f32, Legal);
1141 }
1142
1143 // We have target-specific dag combine patterns for the following nodes:
1144 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
1145 setTargetDAGCombine(ISD::ADD);
1146 setTargetDAGCombine(ISD::SUB);
1147 setTargetDAGCombine(ISD::MUL);
1148 setTargetDAGCombine(ISD::AND);
1149 setTargetDAGCombine(ISD::OR);
1150 setTargetDAGCombine(ISD::XOR);
1151
1152 if (Subtarget->hasV6Ops())
1153 setTargetDAGCombine(ISD::SRL);
1154
1155 setStackPointerRegisterToSaveRestore(ARM::SP);
1156
1157 if (Subtarget->useSoftFloat() || Subtarget->isThumb1Only() ||
1158 !Subtarget->hasVFP2())
1159 setSchedulingPreference(Sched::RegPressure);
1160 else
1161 setSchedulingPreference(Sched::Hybrid);
1162
1163 //// temporary - rewrite interface to use type
1164 MaxStoresPerMemset = 8;
1165 MaxStoresPerMemsetOptSize = 4;
1166 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores
1167 MaxStoresPerMemcpyOptSize = 2;
1168 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores
1169 MaxStoresPerMemmoveOptSize = 2;
1170
1171 // On ARM arguments smaller than 4 bytes are extended, so all arguments
1172 // are at least 4 bytes aligned.
1173 setMinStackArgumentAlignment(4);
1174
1175 // Prefer likely predicted branches to selects on out-of-order cores.
1176 PredictableSelectIsExpensive = Subtarget->getSchedModel().isOutOfOrder();
1177
1178 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2);
1179}
1180
1181bool ARMTargetLowering::useSoftFloat() const {
1182 return Subtarget->useSoftFloat();
1183}
1184
1185// FIXME: It might make sense to define the representative register class as the
1186// nearest super-register that has a non-null superset. For example, DPR_VFP2 is
1187// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently,
1188// SPR's representative would be DPR_VFP2. This should work well if register
1189// pressure tracking were modified such that a register use would increment the
1190// pressure of the register class's representative and all of it's super
1191// classes' representatives transitively. We have not implemented this because
1192// of the difficulty prior to coalescing of modeling operand register classes
1193// due to the common occurrence of cross class copies and subregister insertions
1194// and extractions.
1195std::pair<const TargetRegisterClass *, uint8_t>
1196ARMTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
1197 MVT VT) const {
1198 const TargetRegisterClass *RRC = nullptr;
1199 uint8_t Cost = 1;
1200 switch (VT.SimpleTy) {
1201 default:
1202 return TargetLowering::findRepresentativeClass(TRI, VT);
1203 // Use DPR as representative register class for all floating point
1204 // and vector types. Since there are 32 SPR registers and 32 DPR registers so
1205 // the cost is 1 for both f32 and f64.
1206 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
1207 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
1208 RRC = &ARM::DPRRegClass;
1209 // When NEON is used for SP, only half of the register file is available
1210 // because operations that define both SP and DP results will be constrained
1211 // to the VFP2 class (D0-D15). We currently model this constraint prior to
1212 // coalescing by double-counting the SP regs. See the FIXME above.
1213 if (Subtarget->useNEONForSinglePrecisionFP())
1214 Cost = 2;
1215 break;
1216 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
1217 case MVT::v4f32: case MVT::v2f64:
1218 RRC = &ARM::DPRRegClass;
1219 Cost = 2;
1220 break;
1221 case MVT::v4i64:
1222 RRC = &ARM::DPRRegClass;
1223 Cost = 4;
1224 break;
1225 case MVT::v8i64:
1226 RRC = &ARM::DPRRegClass;
1227 Cost = 8;
1228 break;
1229 }
1230 return std::make_pair(RRC, Cost);
1231}
1232
1233const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
1234 switch ((ARMISD::NodeType)Opcode) {
1235 case ARMISD::FIRST_NUMBER: break;
1236 case ARMISD::Wrapper: return "ARMISD::Wrapper";
1237 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC";
1238 case ARMISD::WrapperJT: return "ARMISD::WrapperJT";
1239 case ARMISD::COPY_STRUCT_BYVAL: return "ARMISD::COPY_STRUCT_BYVAL";
1240 case ARMISD::CALL: return "ARMISD::CALL";
1241 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED";
1242 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK";
1243 case ARMISD::BRCOND: return "ARMISD::BRCOND";
1244 case ARMISD::BR_JT: return "ARMISD::BR_JT";
1245 case ARMISD::BR2_JT: return "ARMISD::BR2_JT";
1246 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG";
1247 case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG";
1248 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD";
1249 case ARMISD::CMP: return "ARMISD::CMP";
1250 case ARMISD::CMN: return "ARMISD::CMN";
1251 case ARMISD::CMPZ: return "ARMISD::CMPZ";
1252 case ARMISD::CMPFP: return "ARMISD::CMPFP";
1253 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
1254 case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
1255 case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
1256
1257 case ARMISD::CMOV: return "ARMISD::CMOV";
1258
1259 case ARMISD::SSAT: return "ARMISD::SSAT";
1260
1261 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG";
1262 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
1263 case ARMISD::RRX: return "ARMISD::RRX";
1264
1265 case ARMISD::ADDC: return "ARMISD::ADDC";
1266 case ARMISD::ADDE: return "ARMISD::ADDE";
1267 case ARMISD::SUBC: return "ARMISD::SUBC";
1268 case ARMISD::SUBE: return "ARMISD::SUBE";
1269
1270 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
1271 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
1272
1273 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
1274 case ARMISD::EH_SJLJ_LONGJMP: return "ARMISD::EH_SJLJ_LONGJMP";
1275 case ARMISD::EH_SJLJ_SETUP_DISPATCH: return "ARMISD::EH_SJLJ_SETUP_DISPATCH";
1276
1277 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN";
1278
1279 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER";
1280
1281 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
1282
1283 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR";
1284
1285 case ARMISD::PRELOAD: return "ARMISD::PRELOAD";
1286
1287 case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK";
1288 case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK";
1289
1290 case ARMISD::VCEQ: return "ARMISD::VCEQ";
1291 case ARMISD::VCEQZ: return "ARMISD::VCEQZ";
1292 case ARMISD::VCGE: return "ARMISD::VCGE";
1293 case ARMISD::VCGEZ: return "ARMISD::VCGEZ";
1294 case ARMISD::VCLEZ: return "ARMISD::VCLEZ";
1295 case ARMISD::VCGEU: return "ARMISD::VCGEU";
1296 case ARMISD::VCGT: return "ARMISD::VCGT";
1297 case ARMISD::VCGTZ: return "ARMISD::VCGTZ";
1298 case ARMISD::VCLTZ: return "ARMISD::VCLTZ";
1299 case ARMISD::VCGTU: return "ARMISD::VCGTU";
1300 case ARMISD::VTST: return "ARMISD::VTST";
1301
1302 case ARMISD::VSHL: return "ARMISD::VSHL";
1303 case ARMISD::VSHRs: return "ARMISD::VSHRs";
1304 case ARMISD::VSHRu: return "ARMISD::VSHRu";
1305 case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
1306 case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
1307 case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
1308 case ARMISD::VQSHLs: return "ARMISD::VQSHLs";
1309 case ARMISD::VQSHLu: return "ARMISD::VQSHLu";
1310 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu";
1311 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs";
1312 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu";
1313 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu";
1314 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs";
1315 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu";
1316 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu";
1317 case ARMISD::VSLI: return "ARMISD::VSLI";
1318 case ARMISD::VSRI: return "ARMISD::VSRI";
1319 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu";
1320 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs";
1321 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM";
1322 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM";
1323 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM";
1324 case ARMISD::VDUP: return "ARMISD::VDUP";
1325 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE";
1326 case ARMISD::VEXT: return "ARMISD::VEXT";
1327 case ARMISD::VREV64: return "ARMISD::VREV64";
1328 case ARMISD::VREV32: return "ARMISD::VREV32";
1329 case ARMISD::VREV16: return "ARMISD::VREV16";
1330 case ARMISD::VZIP: return "ARMISD::VZIP";
1331 case ARMISD::VUZP: return "ARMISD::VUZP";
1332 case ARMISD::VTRN: return "ARMISD::VTRN";
1333 case ARMISD::VTBL1: return "ARMISD::VTBL1";
1334 case ARMISD::VTBL2: return "ARMISD::VTBL2";
1335 case ARMISD::VMULLs: return "ARMISD::VMULLs";
1336 case ARMISD::VMULLu: return "ARMISD::VMULLu";
1337 case ARMISD::UMAAL: return "ARMISD::UMAAL";
1338 case ARMISD::UMLAL: return "ARMISD::UMLAL";
1339 case ARMISD::SMLAL: return "ARMISD::SMLAL";
1340 case ARMISD::SMLALBB: return "ARMISD::SMLALBB";
1341 case ARMISD::SMLALBT: return "ARMISD::SMLALBT";
1342 case ARMISD::SMLALTB: return "ARMISD::SMLALTB";
1343 case ARMISD::SMLALTT: return "ARMISD::SMLALTT";
1344 case ARMISD::SMULWB: return "ARMISD::SMULWB";
1345 case ARMISD::SMULWT: return "ARMISD::SMULWT";
1346 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR";
1347 case ARMISD::BFI: return "ARMISD::BFI";
1348 case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
1349 case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1350 case ARMISD::VBSL: return "ARMISD::VBSL";
1351 case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
1352 case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
1353 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
1354 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP";
1355 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP";
1356 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD";
1357 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD";
1358 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD";
1359 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD";
1360 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD";
1361 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD";
1362 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD";
1363 case ARMISD::VLD1DUP_UPD: return "ARMISD::VLD1DUP_UPD";
1364 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD";
1365 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD";
1366 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD";
1367 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD";
1368 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD";
1369 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD";
1370 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD";
1371 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD";
1372 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD";
1373 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD";
1374 }
1375 return nullptr;
1376}
1377
1378EVT ARMTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
1379 EVT VT) const {
1380 if (!VT.isVector())
1381 return getPointerTy(DL);
1382 return VT.changeVectorElementTypeToInteger();
1383}
1384
1385/// getRegClassFor - Return the register class that should be used for the
1386/// specified value type.
1387const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const {
1388 // Map v4i64 to QQ registers but do not make the type legal. Similarly map
1389 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to
1390 // load / store 4 to 8 consecutive D registers.
1391 if (Subtarget->hasNEON()) {
1392 if (VT == MVT::v4i64)
1393 return &ARM::QQPRRegClass;
1394 if (VT == MVT::v8i64)
1395 return &ARM::QQQQPRRegClass;
1396 }
1397 return TargetLowering::getRegClassFor(VT);
1398}
1399
1400// memcpy, and other memory intrinsics, typically tries to use LDM/STM if the
1401// source/dest is aligned and the copy size is large enough. We therefore want
1402// to align such objects passed to memory intrinsics.
1403bool ARMTargetLowering::shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize,
1404 unsigned &PrefAlign) const {
1405 if (!isa<MemIntrinsic>(CI))
1406 return false;
1407 MinSize = 8;
1408 // On ARM11 onwards (excluding M class) 8-byte aligned LDM is typically 1
1409 // cycle faster than 4-byte aligned LDM.
1410 PrefAlign = (Subtarget->hasV6Ops() && !Subtarget->isMClass() ? 8 : 4);
1411 return true;
1412}
1413
1414// Create a fast isel object.
1415FastISel *
1416ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
1417 const TargetLibraryInfo *libInfo) const {
1418 return ARM::createFastISel(funcInfo, libInfo);
1419}
1420
1421Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
1422 unsigned NumVals = N->getNumValues();
1423 if (!NumVals)
1424 return Sched::RegPressure;
1425
1426 for (unsigned i = 0; i != NumVals; ++i) {
1427 EVT VT = N->getValueType(i);
1428 if (VT == MVT::Glue || VT == MVT::Other)
1429 continue;
1430 if (VT.isFloatingPoint() || VT.isVector())
1431 return Sched::ILP;
1432 }
1433
1434 if (!N->isMachineOpcode())
1435 return Sched::RegPressure;
1436
1437 // Load are scheduled for latency even if there instruction itinerary
1438 // is not available.
1439 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1440 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
1441
1442 if (MCID.getNumDefs() == 0)
1443 return Sched::RegPressure;
1444 if (!Itins->isEmpty() &&
1445 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2)
1446 return Sched::ILP;
1447
1448 return Sched::RegPressure;
1449}
1450
1451//===----------------------------------------------------------------------===//
1452// Lowering Code
1453//===----------------------------------------------------------------------===//
1454
1455static bool isSRL16(const SDValue &Op) {
1456 if (Op.getOpcode() != ISD::SRL)
1457 return false;
1458 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1459 return Const->getZExtValue() == 16;
1460 return false;
1461}
1462
1463static bool isSRA16(const SDValue &Op) {
1464 if (Op.getOpcode() != ISD::SRA)
1465 return false;
1466 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1467 return Const->getZExtValue() == 16;
1468 return false;
1469}
1470
1471static bool isSHL16(const SDValue &Op) {
1472 if (Op.getOpcode() != ISD::SHL)
1473 return false;
1474 if (auto Const = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
1475 return Const->getZExtValue() == 16;
1476 return false;
1477}
1478
1479// Check for a signed 16-bit value. We special case SRA because it makes it
1480// more simple when also looking for SRAs that aren't sign extending a
1481// smaller value. Without the check, we'd need to take extra care with
1482// checking order for some operations.
1483static bool isS16(const SDValue &Op, SelectionDAG &DAG) {
1484 if (isSRA16(Op))
1485 return isSHL16(Op.getOperand(0));
1486 return DAG.ComputeNumSignBits(Op) == 17;
1487}
1488
1489/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC
1490static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) {
1491 switch (CC) {
1492 default: llvm_unreachable("Unknown condition code!")::llvm::llvm_unreachable_internal("Unknown condition code!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1492)
;
1493 case ISD::SETNE: return ARMCC::NE;
1494 case ISD::SETEQ: return ARMCC::EQ;
1495 case ISD::SETGT: return ARMCC::GT;
1496 case ISD::SETGE: return ARMCC::GE;
1497 case ISD::SETLT: return ARMCC::LT;
1498 case ISD::SETLE: return ARMCC::LE;
1499 case ISD::SETUGT: return ARMCC::HI;
1500 case ISD::SETUGE: return ARMCC::HS;
1501 case ISD::SETULT: return ARMCC::LO;
1502 case ISD::SETULE: return ARMCC::LS;
1503 }
1504}
1505
1506/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC.
1507static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
1508 ARMCC::CondCodes &CondCode2, bool &InvalidOnQNaN) {
1509 CondCode2 = ARMCC::AL;
1510 InvalidOnQNaN = true;
1511 switch (CC) {
1512 default: llvm_unreachable("Unknown FP condition!")::llvm::llvm_unreachable_internal("Unknown FP condition!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1512)
;
1513 case ISD::SETEQ:
1514 case ISD::SETOEQ:
1515 CondCode = ARMCC::EQ;
1516 InvalidOnQNaN = false;
1517 break;
1518 case ISD::SETGT:
1519 case ISD::SETOGT: CondCode = ARMCC::GT; break;
1520 case ISD::SETGE:
1521 case ISD::SETOGE: CondCode = ARMCC::GE; break;
1522 case ISD::SETOLT: CondCode = ARMCC::MI; break;
1523 case ISD::SETOLE: CondCode = ARMCC::LS; break;
1524 case ISD::SETONE:
1525 CondCode = ARMCC::MI;
1526 CondCode2 = ARMCC::GT;
1527 InvalidOnQNaN = false;
1528 break;
1529 case ISD::SETO: CondCode = ARMCC::VC; break;
1530 case ISD::SETUO: CondCode = ARMCC::VS; break;
1531 case ISD::SETUEQ:
1532 CondCode = ARMCC::EQ;
1533 CondCode2 = ARMCC::VS;
1534 InvalidOnQNaN = false;
1535 break;
1536 case ISD::SETUGT: CondCode = ARMCC::HI; break;
1537 case ISD::SETUGE: CondCode = ARMCC::PL; break;
1538 case ISD::SETLT:
1539 case ISD::SETULT: CondCode = ARMCC::LT; break;
1540 case ISD::SETLE:
1541 case ISD::SETULE: CondCode = ARMCC::LE; break;
1542 case ISD::SETNE:
1543 case ISD::SETUNE:
1544 CondCode = ARMCC::NE;
1545 InvalidOnQNaN = false;
1546 break;
1547 }
1548}
1549
1550//===----------------------------------------------------------------------===//
1551// Calling Convention Implementation
1552//===----------------------------------------------------------------------===//
1553
1554#include "ARMGenCallingConv.inc"
1555
1556/// getEffectiveCallingConv - Get the effective calling convention, taking into
1557/// account presence of floating point hardware and calling convention
1558/// limitations, such as support for variadic functions.
1559CallingConv::ID
1560ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
1561 bool isVarArg) const {
1562 switch (CC) {
1563 default:
1564 llvm_unreachable("Unsupported calling convention")::llvm::llvm_unreachable_internal("Unsupported calling convention"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1564)
;
1565 case CallingConv::ARM_AAPCS:
1566 case CallingConv::ARM_APCS:
1567 case CallingConv::GHC:
1568 return CC;
1569 case CallingConv::PreserveMost:
1570 return CallingConv::PreserveMost;
1571 case CallingConv::ARM_AAPCS_VFP:
1572 case CallingConv::Swift:
1573 return isVarArg ? CallingConv::ARM_AAPCS : CallingConv::ARM_AAPCS_VFP;
1574 case CallingConv::C:
1575 if (!Subtarget->isAAPCS_ABI())
1576 return CallingConv::ARM_APCS;
1577 else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() &&
1578 getTargetMachine().Options.FloatABIType == FloatABI::Hard &&
1579 !isVarArg)
1580 return CallingConv::ARM_AAPCS_VFP;
1581 else
1582 return CallingConv::ARM_AAPCS;
1583 case CallingConv::Fast:
1584 case CallingConv::CXX_FAST_TLS:
1585 if (!Subtarget->isAAPCS_ABI()) {
1586 if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1587 return CallingConv::Fast;
1588 return CallingConv::ARM_APCS;
1589 } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
1590 return CallingConv::ARM_AAPCS_VFP;
1591 else
1592 return CallingConv::ARM_AAPCS;
1593 }
1594}
1595
1596CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
1597 bool isVarArg) const {
1598 return CCAssignFnForNode(CC, false, isVarArg);
1599}
1600
1601CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
1602 bool isVarArg) const {
1603 return CCAssignFnForNode(CC, true, isVarArg);
1604}
1605
1606/// CCAssignFnForNode - Selects the correct CCAssignFn for the given
1607/// CallingConvention.
1608CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
1609 bool Return,
1610 bool isVarArg) const {
1611 switch (getEffectiveCallingConv(CC, isVarArg)) {
1612 default:
1613 llvm_unreachable("Unsupported calling convention")::llvm::llvm_unreachable_internal("Unsupported calling convention"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1613)
;
1614 case CallingConv::ARM_APCS:
1615 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS);
1616 case CallingConv::ARM_AAPCS:
1617 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1618 case CallingConv::ARM_AAPCS_VFP:
1619 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
1620 case CallingConv::Fast:
1621 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS);
1622 case CallingConv::GHC:
1623 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC);
1624 case CallingConv::PreserveMost:
1625 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS);
1626 }
1627}
1628
1629/// LowerCallResult - Lower the result values of a call into the
1630/// appropriate copies out of appropriate physical registers.
1631SDValue ARMTargetLowering::LowerCallResult(
1632 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
1633 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1634 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
1635 SDValue ThisVal) const {
1636
1637 // Assign locations to each value returned by this call.
1638 SmallVector<CCValAssign, 16> RVLocs;
1639 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1640 *DAG.getContext());
1641 CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
1642
1643 // Copy all of the result registers out of their specified physreg.
1644 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1645 CCValAssign VA = RVLocs[i];
1646
1647 // Pass 'this' value directly from the argument to return value, to avoid
1648 // reg unit interference
1649 if (i == 0 && isThisReturn) {
1650 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1651, __PRETTY_FUNCTION__))
1651 "unexpected return calling convention register assignment")((!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
"unexpected return calling convention register assignment") ?
static_cast<void> (0) : __assert_fail ("!VA.needsCustom() && VA.getLocVT() == MVT::i32 && \"unexpected return calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1651, __PRETTY_FUNCTION__))
;
1652 InVals.push_back(ThisVal);
1653 continue;
1654 }
1655
1656 SDValue Val;
1657 if (VA.needsCustom()) {
1658 // Handle f64 or half of a v2f64.
1659 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1660 InFlag);
1661 Chain = Lo.getValue(1);
1662 InFlag = Lo.getValue(2);
1663 VA = RVLocs[++i]; // skip ahead to next loc
1664 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
1665 InFlag);
1666 Chain = Hi.getValue(1);
1667 InFlag = Hi.getValue(2);
1668 if (!Subtarget->isLittle())
1669 std::swap (Lo, Hi);
1670 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1671
1672 if (VA.getLocVT() == MVT::v2f64) {
1673 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
1674 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1675 DAG.getConstant(0, dl, MVT::i32));
1676
1677 VA = RVLocs[++i]; // skip ahead to next loc
1678 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1679 Chain = Lo.getValue(1);
1680 InFlag = Lo.getValue(2);
1681 VA = RVLocs[++i]; // skip ahead to next loc
1682 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
1683 Chain = Hi.getValue(1);
1684 InFlag = Hi.getValue(2);
1685 if (!Subtarget->isLittle())
1686 std::swap (Lo, Hi);
1687 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
1688 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
1689 DAG.getConstant(1, dl, MVT::i32));
1690 }
1691 } else {
1692 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1693 InFlag);
1694 Chain = Val.getValue(1);
1695 InFlag = Val.getValue(2);
1696 }
1697
1698 switch (VA.getLocInfo()) {
1699 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1699)
;
1700 case CCValAssign::Full: break;
1701 case CCValAssign::BCvt:
1702 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val);
1703 break;
1704 }
1705
1706 InVals.push_back(Val);
1707 }
1708
1709 return Chain;
1710}
1711
1712/// LowerMemOpCallTo - Store the argument to the stack.
1713SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1714 SDValue Arg, const SDLoc &dl,
1715 SelectionDAG &DAG,
1716 const CCValAssign &VA,
1717 ISD::ArgFlagsTy Flags) const {
1718 unsigned LocMemOffset = VA.getLocMemOffset();
1719 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1720 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1721 StackPtr, PtrOff);
1722 return DAG.getStore(
1723 Chain, dl, Arg, PtrOff,
1724 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
1725}
1726
1727void ARMTargetLowering::PassF64ArgInRegs(const SDLoc &dl, SelectionDAG &DAG,
1728 SDValue Chain, SDValue &Arg,
1729 RegsToPassVector &RegsToPass,
1730 CCValAssign &VA, CCValAssign &NextVA,
1731 SDValue &StackPtr,
1732 SmallVectorImpl<SDValue> &MemOpChains,
1733 ISD::ArgFlagsTy Flags) const {
1734
1735 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
1736 DAG.getVTList(MVT::i32, MVT::i32), Arg);
1737 unsigned id = Subtarget->isLittle() ? 0 : 1;
1738 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd.getValue(id)));
1739
1740 if (NextVA.isRegLoc())
1741 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1-id)));
1742 else {
1743 assert(NextVA.isMemLoc())((NextVA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("NextVA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1743, __PRETTY_FUNCTION__))
;
1744 if (!StackPtr.getNode())
1745 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP,
1746 getPointerTy(DAG.getDataLayout()));
1747
1748 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1-id),
1749 dl, DAG, NextVA,
1750 Flags));
1751 }
1752}
1753
1754/// LowerCall - Lowering a call into a callseq_start <-
1755/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter
1756/// nodes.
1757SDValue
1758ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1759 SmallVectorImpl<SDValue> &InVals) const {
1760 SelectionDAG &DAG = CLI.DAG;
1761 SDLoc &dl = CLI.DL;
1762 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1763 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1764 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1765 SDValue Chain = CLI.Chain;
1766 SDValue Callee = CLI.Callee;
1767 bool &isTailCall = CLI.IsTailCall;
1768 CallingConv::ID CallConv = CLI.CallConv;
1769 bool doesNotRet = CLI.DoesNotReturn;
1770 bool isVarArg = CLI.IsVarArg;
1771
1772 MachineFunction &MF = DAG.getMachineFunction();
1773 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
1774 bool isThisReturn = false;
1775 bool isSibCall = false;
1776 auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
1777
1778 // Disable tail calls if they're not supported.
1779 if (!Subtarget->supportsTailCall() || Attr.getValueAsString() == "true")
1780 isTailCall = false;
1781
1782 if (isTailCall) {
1783 // Check if it's really possible to do a tail call.
1784 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
1785 isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(),
1786 Outs, OutVals, Ins, DAG);
1787 if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
1788 report_fatal_error("failed to perform tail call elimination on a call "
1789 "site marked musttail");
1790 // We don't support GuaranteedTailCallOpt for ARM, only automatically
1791 // detected sibcalls.
1792 if (isTailCall) {
1793 ++NumTailCalls;
1794 isSibCall = true;
1795 }
1796 }
1797
1798 // Analyze operands of the call, assigning locations to each operand.
1799 SmallVector<CCValAssign, 16> ArgLocs;
1800 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1801 *DAG.getContext());
1802 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
1803
1804 // Get a count of how many bytes are to be pushed on the stack.
1805 unsigned NumBytes = CCInfo.getNextStackOffset();
1806
1807 // For tail calls, memory operands are available in our caller's stack.
1808 if (isSibCall)
1809 NumBytes = 0;
1810
1811 // Adjust the stack pointer for the new arguments...
1812 // These operations are automatically eliminated by the prolog/epilog pass
1813 if (!isSibCall)
1814 Chain = DAG.getCALLSEQ_START(Chain,
1815 DAG.getIntPtrConstant(NumBytes, dl, true), dl);
1816
1817 SDValue StackPtr =
1818 DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy(DAG.getDataLayout()));
1819
1820 RegsToPassVector RegsToPass;
1821 SmallVector<SDValue, 8> MemOpChains;
1822
1823 // Walk the register/memloc assignments, inserting copies/loads. In the case
1824 // of tail call optimization, arguments are handled later.
1825 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
1826 i != e;
1827 ++i, ++realArgIdx) {
1828 CCValAssign &VA = ArgLocs[i];
1829 SDValue Arg = OutVals[realArgIdx];
1830 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
1831 bool isByVal = Flags.isByVal();
1832
1833 // Promote the value if needed.
1834 switch (VA.getLocInfo()) {
1835 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1835)
;
1836 case CCValAssign::Full: break;
1837 case CCValAssign::SExt:
1838 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
1839 break;
1840 case CCValAssign::ZExt:
1841 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
1842 break;
1843 case CCValAssign::AExt:
1844 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
1845 break;
1846 case CCValAssign::BCvt:
1847 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
1848 break;
1849 }
1850
1851 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces
1852 if (VA.needsCustom()) {
1853 if (VA.getLocVT() == MVT::v2f64) {
1854 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1855 DAG.getConstant(0, dl, MVT::i32));
1856 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
1857 DAG.getConstant(1, dl, MVT::i32));
1858
1859 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass,
1860 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1861
1862 VA = ArgLocs[++i]; // skip ahead to next loc
1863 if (VA.isRegLoc()) {
1864 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass,
1865 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
1866 } else {
1867 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1867, __PRETTY_FUNCTION__))
;
1868
1869 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
1870 dl, DAG, VA, Flags));
1871 }
1872 } else {
1873 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i],
1874 StackPtr, MemOpChains, Flags);
1875 }
1876 } else if (VA.isRegLoc()) {
1877 if (realArgIdx == 0 && Flags.isReturned() && !Flags.isSwiftSelf() &&
1878 Outs[0].VT == MVT::i32) {
1879 assert(VA.getLocVT() == MVT::i32 &&((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1880, __PRETTY_FUNCTION__))
1880 "unexpected calling convention register assignment")((VA.getLocVT() == MVT::i32 && "unexpected calling convention register assignment"
) ? static_cast<void> (0) : __assert_fail ("VA.getLocVT() == MVT::i32 && \"unexpected calling convention register assignment\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1880, __PRETTY_FUNCTION__))
;
1881 assert(!Ins.empty() && Ins[0].VT == MVT::i32 &&((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1882, __PRETTY_FUNCTION__))
1882 "unexpected use of 'returned'")((!Ins.empty() && Ins[0].VT == MVT::i32 && "unexpected use of 'returned'"
) ? static_cast<void> (0) : __assert_fail ("!Ins.empty() && Ins[0].VT == MVT::i32 && \"unexpected use of 'returned'\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1882, __PRETTY_FUNCTION__))
;
1883 isThisReturn = true;
1884 }
1885 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1886 } else if (isByVal) {
1887 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1887, __PRETTY_FUNCTION__))
;
1888 unsigned offset = 0;
1889
1890 // True if this byval aggregate will be split between registers
1891 // and memory.
1892 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount();
1893 unsigned CurByValIdx = CCInfo.getInRegsParamsProcessed();
1894
1895 if (CurByValIdx < ByValArgsCount) {
1896
1897 unsigned RegBegin, RegEnd;
1898 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd);
1899
1900 EVT PtrVT =
1901 DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
1902 unsigned int i, j;
1903 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) {
1904 SDValue Const = DAG.getConstant(4*i, dl, MVT::i32);
1905 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
1906 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg,
1907 MachinePointerInfo(),
1908 DAG.InferPtrAlignment(AddArg));
1909 MemOpChains.push_back(Load.getValue(1));
1910 RegsToPass.push_back(std::make_pair(j, Load));
1911 }
1912
1913 // If parameter size outsides register area, "offset" value
1914 // helps us to calculate stack slot for remained part properly.
1915 offset = RegEnd - RegBegin;
1916
1917 CCInfo.nextInRegsParam();
1918 }
1919
1920 if (Flags.getByValSize() > 4*offset) {
1921 auto PtrVT = getPointerTy(DAG.getDataLayout());
1922 unsigned LocMemOffset = VA.getLocMemOffset();
1923 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1924 SDValue Dst = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, StkPtrOff);
1925 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset, dl);
1926 SDValue Src = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, SrcOffset);
1927 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, dl,
1928 MVT::i32);
1929 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), dl,
1930 MVT::i32);
1931
1932 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
1933 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode};
1934 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs,
1935 Ops));
1936 }
1937 } else if (!isSibCall) {
1938 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1938, __PRETTY_FUNCTION__))
;
1939
1940 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
1941 dl, DAG, VA, Flags));
1942 }
1943 }
1944
1945 if (!MemOpChains.empty())
1946 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
1947
1948 // Build a sequence of copy-to-reg nodes chained together with token chain
1949 // and flag operands which copy the outgoing args into the appropriate regs.
1950 SDValue InFlag;
1951 // Tail call byval lowering might overwrite argument registers so in case of
1952 // tail call optimization the copies to registers are lowered later.
1953 if (!isTailCall)
1954 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1955 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1956 RegsToPass[i].second, InFlag);
1957 InFlag = Chain.getValue(1);
1958 }
1959
1960 // For tail calls lower the arguments to the 'real' stack slot.
1961 if (isTailCall) {
1962 // Force all the incoming stack arguments to be loaded from the stack
1963 // before any new outgoing arguments are stored to the stack, because the
1964 // outgoing stack slots may alias the incoming argument stack slots, and
1965 // the alias isn't otherwise explicit. This is slightly more conservative
1966 // than necessary, because it means that each store effectively depends
1967 // on every argument instead of just those arguments it would clobber.
1968
1969 // Do not flag preceding copytoreg stuff together with the following stuff.
1970 InFlag = SDValue();
1971 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1972 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1973 RegsToPass[i].second, InFlag);
1974 InFlag = Chain.getValue(1);
1975 }
1976 InFlag = SDValue();
1977 }
1978
1979 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1980 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1981 // node so that legalize doesn't hack it.
1982 bool isDirect = false;
1983
1984 const TargetMachine &TM = getTargetMachine();
1985 const Module *Mod = MF.getFunction()->getParent();
1986 const GlobalValue *GV = nullptr;
1987 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
1988 GV = G->getGlobal();
1989 bool isStub =
1990 !TM.shouldAssumeDSOLocal(*Mod, GV) && Subtarget->isTargetMachO();
1991
1992 bool isARMFunc = !Subtarget->isThumb() || (isStub && !Subtarget->isMClass());
1993 bool isLocalARMFunc = false;
1994 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1995 auto PtrVt = getPointerTy(DAG.getDataLayout());
1996
1997 if (Subtarget->genLongCalls()) {
1998 assert((!isPositionIndependent() || Subtarget->isTargetWindows()) &&(((!isPositionIndependent() || Subtarget->isTargetWindows(
)) && "long-calls codegen is not position independent!"
) ? static_cast<void> (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1999, __PRETTY_FUNCTION__))
1999 "long-calls codegen is not position independent!")(((!isPositionIndependent() || Subtarget->isTargetWindows(
)) && "long-calls codegen is not position independent!"
) ? static_cast<void> (0) : __assert_fail ("(!isPositionIndependent() || Subtarget->isTargetWindows()) && \"long-calls codegen is not position independent!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 1999, __PRETTY_FUNCTION__))
;
2000 // Handle a global address or an external symbol. If it's not one of
2001 // those, the target's already in a register, so we don't need to do
2002 // anything extra.
2003 if (isa<GlobalAddressSDNode>(Callee)) {
2004 // Create a constant pool entry for the callee address
2005 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2006 ARMConstantPoolValue *CPV =
2007 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0);
2008
2009 // Get the address of the callee into a register
2010 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2011 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2012 Callee = DAG.getLoad(
2013 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2014 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2015 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) {
2016 const char *Sym = S->getSymbol();
2017
2018 // Create a constant pool entry for the callee address
2019 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2020 ARMConstantPoolValue *CPV =
2021 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2022 ARMPCLabelIndex, 0);
2023 // Get the address of the callee into a register
2024 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2025 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2026 Callee = DAG.getLoad(
2027 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2028 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2029 }
2030 } else if (isa<GlobalAddressSDNode>(Callee)) {
2031 // If we're optimizing for minimum size and the function is called three or
2032 // more times in this block, we can improve codesize by calling indirectly
2033 // as BLXr has a 16-bit encoding.
2034 auto *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
2035 auto *BB = CLI.CS->getParent();
2036 bool PreferIndirect =
2037 Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
2038 count_if(GV->users(), [&BB](const User *U) {
2039 return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
2040 }) > 2;
2041
2042 if (!PreferIndirect) {
2043 isDirect = true;
2044 bool isDef = GV->isStrongDefinitionForLinker();
2045
2046 // ARM call to a local ARM function is predicable.
2047 isLocalARMFunc = !Subtarget->isThumb() && (isDef || !ARMInterworking);
2048 // tBX takes a register source operand.
2049 if (isStub && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2050 assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?")((Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetMachO() && \"WrapperPIC use on non-MachO?\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2050, __PRETTY_FUNCTION__))
;
2051 Callee = DAG.getNode(
2052 ARMISD::WrapperPIC, dl, PtrVt,
2053 DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
2054 Callee = DAG.getLoad(
2055 PtrVt, dl, DAG.getEntryNode(), Callee,
2056 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2057 /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
2058 MachineMemOperand::MOInvariant);
2059 } else if (Subtarget->isTargetCOFF()) {
2060 assert(Subtarget->isTargetWindows() &&((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2061, __PRETTY_FUNCTION__))
2061 "Windows is the only supported COFF target")((Subtarget->isTargetWindows() && "Windows is the only supported COFF target"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows is the only supported COFF target\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2061, __PRETTY_FUNCTION__))
;
2062 unsigned TargetFlags = GV->hasDLLImportStorageClass()
2063 ? ARMII::MO_DLLIMPORT
2064 : ARMII::MO_NO_FLAG;
2065 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, /*Offset=*/0,
2066 TargetFlags);
2067 if (GV->hasDLLImportStorageClass())
2068 Callee =
2069 DAG.getLoad(PtrVt, dl, DAG.getEntryNode(),
2070 DAG.getNode(ARMISD::Wrapper, dl, PtrVt, Callee),
2071 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
2072 } else {
2073 Callee = DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, 0);
2074 }
2075 }
2076 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2077 isDirect = true;
2078 // tBX takes a register source operand.
2079 const char *Sym = S->getSymbol();
2080 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
2081 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2082 ARMConstantPoolValue *CPV =
2083 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym,
2084 ARMPCLabelIndex, 4);
2085 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVt, 4);
2086 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
2087 Callee = DAG.getLoad(
2088 PtrVt, dl, DAG.getEntryNode(), CPAddr,
2089 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2090 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2091 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVt, Callee, PICLabel);
2092 } else {
2093 Callee = DAG.getTargetExternalSymbol(Sym, PtrVt, 0);
2094 }
2095 }
2096
2097 // FIXME: handle tail calls differently.
2098 unsigned CallOpc;
2099 if (Subtarget->isThumb()) {
2100 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps())
2101 CallOpc = ARMISD::CALL_NOLINK;
2102 else
2103 CallOpc = ARMISD::CALL;
2104 } else {
2105 if (!isDirect && !Subtarget->hasV5TOps())
2106 CallOpc = ARMISD::CALL_NOLINK;
2107 else if (doesNotRet && isDirect && Subtarget->hasRetAddrStack() &&
2108 // Emit regular call when code size is the priority
2109 !MF.getFunction()->optForMinSize())
2110 // "mov lr, pc; b _foo" to avoid confusing the RSP
2111 CallOpc = ARMISD::CALL_NOLINK;
2112 else
2113 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL;
2114 }
2115
2116 std::vector<SDValue> Ops;
2117 Ops.push_back(Chain);
2118 Ops.push_back(Callee);
2119
2120 // Add argument registers to the end of the list so that they are known live
2121 // into the call.
2122 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2123 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2124 RegsToPass[i].second.getValueType()));
2125
2126 // Add a register mask operand representing the call-preserved registers.
2127 if (!isTailCall) {
2128 const uint32_t *Mask;
2129 const ARMBaseRegisterInfo *ARI = Subtarget->getRegisterInfo();
2130 if (isThisReturn) {
2131 // For 'this' returns, use the R0-preserving mask if applicable
2132 Mask = ARI->getThisReturnPreservedMask(MF, CallConv);
2133 if (!Mask) {
2134 // Set isThisReturn to false if the calling convention is not one that
2135 // allows 'returned' to be modeled in this way, so LowerCallResult does
2136 // not try to pass 'this' straight through
2137 isThisReturn = false;
2138 Mask = ARI->getCallPreservedMask(MF, CallConv);
2139 }
2140 } else
2141 Mask = ARI->getCallPreservedMask(MF, CallConv);
2142
2143 assert(Mask && "Missing call preserved mask for calling convention")((Mask && "Missing call preserved mask for calling convention"
) ? static_cast<void> (0) : __assert_fail ("Mask && \"Missing call preserved mask for calling convention\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2143, __PRETTY_FUNCTION__))
;
2144 Ops.push_back(DAG.getRegisterMask(Mask));
2145 }
2146
2147 if (InFlag.getNode())
2148 Ops.push_back(InFlag);
2149
2150 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2151 if (isTailCall) {
2152 MF.getFrameInfo().setHasTailCall();
2153 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
2154 }
2155
2156 // Returns a chain and a flag for retval copy to use.
2157 Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
2158 InFlag = Chain.getValue(1);
2159
2160 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
2161 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
2162 if (!Ins.empty())
2163 InFlag = Chain.getValue(1);
2164
2165 // Handle result values, copying them out of physregs into vregs that we
2166 // return.
2167 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2168 InVals, isThisReturn,
2169 isThisReturn ? OutVals[0] : SDValue());
2170}
2171
2172/// HandleByVal - Every parameter *after* a byval parameter is passed
2173/// on the stack. Remember the next parameter register to allocate,
2174/// and then confiscate the rest of the parameter registers to insure
2175/// this.
2176void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
2177 unsigned Align) const {
2178 // Byval (as with any stack) slots are always at least 4 byte aligned.
2179 Align = std::max(Align, 4U);
2180
2181 unsigned Reg = State->AllocateReg(GPRArgRegs);
2182 if (!Reg)
2183 return;
2184
2185 unsigned AlignInRegs = Align / 4;
2186 unsigned Waste = (ARM::R4 - Reg) % AlignInRegs;
2187 for (unsigned i = 0; i < Waste; ++i)
2188 Reg = State->AllocateReg(GPRArgRegs);
2189
2190 if (!Reg)
2191 return;
2192
2193 unsigned Excess = 4 * (ARM::R4 - Reg);
2194
2195 // Special case when NSAA != SP and parameter size greater than size of
2196 // all remained GPR regs. In that case we can't split parameter, we must
2197 // send it to stack. We also must set NCRN to R4, so waste all
2198 // remained registers.
2199 const unsigned NSAAOffset = State->getNextStackOffset();
2200 if (NSAAOffset != 0 && Size > Excess) {
2201 while (State->AllocateReg(GPRArgRegs))
2202 ;
2203 return;
2204 }
2205
2206 // First register for byval parameter is the first register that wasn't
2207 // allocated before this method call, so it would be "reg".
2208 // If parameter is small enough to be saved in range [reg, r4), then
2209 // the end (first after last) register would be reg + param-size-in-regs,
2210 // else parameter would be splitted between registers and stack,
2211 // end register would be r4 in this case.
2212 unsigned ByValRegBegin = Reg;
2213 unsigned ByValRegEnd = std::min<unsigned>(Reg + Size / 4, ARM::R4);
2214 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd);
2215 // Note, first register is allocated in the beginning of function already,
2216 // allocate remained amount of registers we need.
2217 for (unsigned i = Reg + 1; i != ByValRegEnd; ++i)
2218 State->AllocateReg(GPRArgRegs);
2219 // A byval parameter that is split between registers and memory needs its
2220 // size truncated here.
2221 // In the case where the entire structure fits in registers, we set the
2222 // size in memory to zero.
2223 Size = std::max<int>(Size - Excess, 0);
2224}
2225
2226/// MatchingStackOffset - Return true if the given stack call argument is
2227/// already available in the same position (relatively) of the caller's
2228/// incoming argument stack.
2229static
2230bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
2231 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2232 const TargetInstrInfo *TII) {
2233 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2234 int FI = std::numeric_limits<int>::max();
2235 if (Arg.getOpcode() == ISD::CopyFromReg) {
2236 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2237 if (!TargetRegisterInfo::isVirtualRegister(VR))
2238 return false;
2239 MachineInstr *Def = MRI->getVRegDef(VR);
2240 if (!Def)
2241 return false;
2242 if (!Flags.isByVal()) {
2243 if (!TII->isLoadFromStackSlot(*Def, FI))
2244 return false;
2245 } else {
2246 return false;
2247 }
2248 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2249 if (Flags.isByVal())
2250 // ByVal argument is passed in as a pointer but it's now being
2251 // dereferenced. e.g.
2252 // define @foo(%struct.X* %A) {
2253 // tail call @bar(%struct.X* byval %A)
2254 // }
2255 return false;
2256 SDValue Ptr = Ld->getBasePtr();
2257 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2258 if (!FINode)
2259 return false;
2260 FI = FINode->getIndex();
2261 } else
2262 return false;
2263
2264 assert(FI != std::numeric_limits<int>::max())((FI != std::numeric_limits<int>::max()) ? static_cast<
void> (0) : __assert_fail ("FI != std::numeric_limits<int>::max()"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2264, __PRETTY_FUNCTION__))
;
2265 if (!MFI.isFixedObjectIndex(FI))
2266 return false;
2267 return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
2268}
2269
2270/// IsEligibleForTailCallOptimization - Check whether the call is eligible
2271/// for tail call optimization. Targets which want to do tail call
2272/// optimization should implement this function.
2273bool
2274ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
2275 CallingConv::ID CalleeCC,
2276 bool isVarArg,
2277 bool isCalleeStructRet,
2278 bool isCallerStructRet,
2279 const SmallVectorImpl<ISD::OutputArg> &Outs,
2280 const SmallVectorImpl<SDValue> &OutVals,
2281 const SmallVectorImpl<ISD::InputArg> &Ins,
2282 SelectionDAG& DAG) const {
2283 MachineFunction &MF = DAG.getMachineFunction();
2284 const Function *CallerF = MF.getFunction();
2285 CallingConv::ID CallerCC = CallerF->getCallingConv();
2286
2287 assert(Subtarget->supportsTailCall())((Subtarget->supportsTailCall()) ? static_cast<void>
(0) : __assert_fail ("Subtarget->supportsTailCall()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2287, __PRETTY_FUNCTION__))
;
2288
2289 // Look for obvious safe cases to perform tail call optimization that do not
2290 // require ABI changes. This is what gcc calls sibcall.
2291
2292 // Exception-handling functions need a special set of instructions to indicate
2293 // a return to the hardware. Tail-calling another function would probably
2294 // break this.
2295 if (CallerF->hasFnAttribute("interrupt"))
2296 return false;
2297
2298 // Also avoid sibcall optimization if either caller or callee uses struct
2299 // return semantics.
2300 if (isCalleeStructRet || isCallerStructRet)
2301 return false;
2302
2303 // Externally-defined functions with weak linkage should not be
2304 // tail-called on ARM when the OS does not support dynamic
2305 // pre-emption of symbols, as the AAELF spec requires normal calls
2306 // to undefined weak functions to be replaced with a NOP or jump to the
2307 // next instruction. The behaviour of branch instructions in this
2308 // situation (as used for tail calls) is implementation-defined, so we
2309 // cannot rely on the linker replacing the tail call with a return.
2310 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2311 const GlobalValue *GV = G->getGlobal();
2312 const Triple &TT = getTargetMachine().getTargetTriple();
2313 if (GV->hasExternalWeakLinkage() &&
2314 (!TT.isOSWindows() || TT.isOSBinFormatELF() || TT.isOSBinFormatMachO()))
2315 return false;
2316 }
2317
2318 // Check that the call results are passed in the same way.
2319 LLVMContext &C = *DAG.getContext();
2320 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2321 CCAssignFnForReturn(CalleeCC, isVarArg),
2322 CCAssignFnForReturn(CallerCC, isVarArg)))
2323 return false;
2324 // The callee has to preserve all registers the caller needs to preserve.
2325 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2326 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2327 if (CalleeCC != CallerCC) {
2328 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2329 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2330 return false;
2331 }
2332
2333 // If Caller's vararg or byval argument has been split between registers and
2334 // stack, do not perform tail call, since part of the argument is in caller's
2335 // local frame.
2336 const ARMFunctionInfo *AFI_Caller = MF.getInfo<ARMFunctionInfo>();
2337 if (AFI_Caller->getArgRegsSaveSize())
2338 return false;
2339
2340 // If the callee takes no arguments then go on to check the results of the
2341 // call.
2342 if (!Outs.empty()) {
2343 // Check if stack adjustment is needed. For now, do not do this if any
2344 // argument is passed on the stack.
2345 SmallVector<CCValAssign, 16> ArgLocs;
2346 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
2347 CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
2348 if (CCInfo.getNextStackOffset()) {
2349 // Check if the arguments are already laid out in the right way as
2350 // the caller's fixed stack objects.
2351 MachineFrameInfo &MFI = MF.getFrameInfo();
2352 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2353 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2354 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
2355 i != e;
2356 ++i, ++realArgIdx) {
2357 CCValAssign &VA = ArgLocs[i];
2358 EVT RegVT = VA.getLocVT();
2359 SDValue Arg = OutVals[realArgIdx];
2360 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags;
2361 if (VA.getLocInfo() == CCValAssign::Indirect)
2362 return false;
2363 if (VA.needsCustom()) {
2364 // f64 and vector types are split into multiple registers or
2365 // register/stack-slot combinations. The types will not match
2366 // the registers; give up on memory f64 refs until we figure
2367 // out what to do about this.
2368 if (!VA.isRegLoc())
2369 return false;
2370 if (!ArgLocs[++i].isRegLoc())
2371 return false;
2372 if (RegVT == MVT::v2f64) {
2373 if (!ArgLocs[++i].isRegLoc())
2374 return false;
2375 if (!ArgLocs[++i].isRegLoc())
2376 return false;
2377 }
2378 } else if (!VA.isRegLoc()) {
2379 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
2380 MFI, MRI, TII))
2381 return false;
2382 }
2383 }
2384 }
2385
2386 const MachineRegisterInfo &MRI = MF.getRegInfo();
2387 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2388 return false;
2389 }
2390
2391 return true;
2392}
2393
2394bool
2395ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
2396 MachineFunction &MF, bool isVarArg,
2397 const SmallVectorImpl<ISD::OutputArg> &Outs,
2398 LLVMContext &Context) const {
2399 SmallVector<CCValAssign, 16> RVLocs;
2400 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2401 return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2402}
2403
2404static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
2405 const SDLoc &DL, SelectionDAG &DAG) {
2406 const MachineFunction &MF = DAG.getMachineFunction();
2407 const Function *F = MF.getFunction();
2408
2409 StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString();
2410
2411 // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset
2412 // version of the "preferred return address". These offsets affect the return
2413 // instruction if this is a return from PL1 without hypervisor extensions.
2414 // IRQ/FIQ: +4 "subs pc, lr, #4"
2415 // SWI: 0 "subs pc, lr, #0"
2416 // ABORT: +4 "subs pc, lr, #4"
2417 // UNDEF: +4/+2 "subs pc, lr, #0"
2418 // UNDEF varies depending on where the exception came from ARM or Thumb
2419 // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0.
2420
2421 int64_t LROffset;
2422 if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" ||
2423 IntKind == "ABORT")
2424 LROffset = 4;
2425 else if (IntKind == "SWI" || IntKind == "UNDEF")
2426 LROffset = 0;
2427 else
2428 report_fatal_error("Unsupported interrupt attribute. If present, value "
2429 "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF");
2430
2431 RetOps.insert(RetOps.begin() + 1,
2432 DAG.getConstant(LROffset, DL, MVT::i32, false));
2433
2434 return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, RetOps);
2435}
2436
2437SDValue
2438ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2439 bool isVarArg,
2440 const SmallVectorImpl<ISD::OutputArg> &Outs,
2441 const SmallVectorImpl<SDValue> &OutVals,
2442 const SDLoc &dl, SelectionDAG &DAG) const {
2443
2444 // CCValAssign - represent the assignment of the return value to a location.
2445 SmallVector<CCValAssign, 16> RVLocs;
2446
2447 // CCState - Info about the registers and stack slots.
2448 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
2449 *DAG.getContext());
2450
2451 // Analyze outgoing return values.
2452 CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
2453
2454 SDValue Flag;
2455 SmallVector<SDValue, 4> RetOps;
2456 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2457 bool isLittleEndian = Subtarget->isLittle();
2458
2459 MachineFunction &MF = DAG.getMachineFunction();
2460 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2461 AFI->setReturnRegsCount(RVLocs.size());
2462
2463 // Copy the result values into the output registers.
2464 for (unsigned i = 0, realRVLocIdx = 0;
2465 i != RVLocs.size();
2466 ++i, ++realRVLocIdx) {
2467 CCValAssign &VA = RVLocs[i];
2468 assert(VA.isRegLoc() && "Can only return in registers!")((VA.isRegLoc() && "Can only return in registers!") ?
static_cast<void> (0) : __assert_fail ("VA.isRegLoc() && \"Can only return in registers!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2468, __PRETTY_FUNCTION__))
;
2469
2470 SDValue Arg = OutVals[realRVLocIdx];
2471
2472 switch (VA.getLocInfo()) {
2473 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2473)
;
2474 case CCValAssign::Full: break;
2475 case CCValAssign::BCvt:
2476 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg);
2477 break;
2478 }
2479
2480 if (VA.needsCustom()) {
2481 if (VA.getLocVT() == MVT::v2f64) {
2482 // Extract the first half and return it in two registers.
2483 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2484 DAG.getConstant(0, dl, MVT::i32));
2485 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
2486 DAG.getVTList(MVT::i32, MVT::i32), Half);
2487
2488 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2489 HalfGPRs.getValue(isLittleEndian ? 0 : 1),
2490 Flag);
2491 Flag = Chain.getValue(1);
2492 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2493 VA = RVLocs[++i]; // skip ahead to next loc
2494 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2495 HalfGPRs.getValue(isLittleEndian ? 1 : 0),
2496 Flag);
2497 Flag = Chain.getValue(1);
2498 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2499 VA = RVLocs[++i]; // skip ahead to next loc
2500
2501 // Extract the 2nd half and fall through to handle it as an f64 value.
2502 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
2503 DAG.getConstant(1, dl, MVT::i32));
2504 }
2505 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
2506 // available.
2507 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
2508 DAG.getVTList(MVT::i32, MVT::i32), Arg);
2509 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2510 fmrrd.getValue(isLittleEndian ? 0 : 1),
2511 Flag);
2512 Flag = Chain.getValue(1);
2513 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2514 VA = RVLocs[++i]; // skip ahead to next loc
2515 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
2516 fmrrd.getValue(isLittleEndian ? 1 : 0),
2517 Flag);
2518 } else
2519 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
2520
2521 // Guarantee that all emitted copies are
2522 // stuck together, avoiding something bad.
2523 Flag = Chain.getValue(1);
2524 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
2525 }
2526 const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
2527 const MCPhysReg *I =
2528 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2529 if (I) {
2530 for (; *I; ++I) {
2531 if (ARM::GPRRegClass.contains(*I))
2532 RetOps.push_back(DAG.getRegister(*I, MVT::i32));
2533 else if (ARM::DPRRegClass.contains(*I))
2534 RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
2535 else
2536 llvm_unreachable("Unexpected register class in CSRsViaCopy!")::llvm::llvm_unreachable_internal("Unexpected register class in CSRsViaCopy!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2536)
;
2537 }
2538 }
2539
2540 // Update chain and glue.
2541 RetOps[0] = Chain;
2542 if (Flag.getNode())
2543 RetOps.push_back(Flag);
2544
2545 // CPUs which aren't M-class use a special sequence to return from
2546 // exceptions (roughly, any instruction setting pc and cpsr simultaneously,
2547 // though we use "subs pc, lr, #N").
2548 //
2549 // M-class CPUs actually use a normal return sequence with a special
2550 // (hardware-provided) value in LR, so the normal code path works.
2551 if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") &&
2552 !Subtarget->isMClass()) {
2553 if (Subtarget->isThumb1Only())
2554 report_fatal_error("interrupt attribute is not supported in Thumb1");
2555 return LowerInterruptReturn(RetOps, dl, DAG);
2556 }
2557
2558 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps);
2559}
2560
2561bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2562 if (N->getNumValues() != 1)
2563 return false;
2564 if (!N->hasNUsesOfValue(1, 0))
2565 return false;
2566
2567 SDValue TCChain = Chain;
2568 SDNode *Copy = *N->use_begin();
2569 if (Copy->getOpcode() == ISD::CopyToReg) {
2570 // If the copy has a glue operand, we conservatively assume it isn't safe to
2571 // perform a tail call.
2572 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2573 return false;
2574 TCChain = Copy->getOperand(0);
2575 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) {
2576 SDNode *VMov = Copy;
2577 // f64 returned in a pair of GPRs.
2578 SmallPtrSet<SDNode*, 2> Copies;
2579 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2580 UI != UE; ++UI) {
2581 if (UI->getOpcode() != ISD::CopyToReg)
2582 return false;
2583 Copies.insert(*UI);
2584 }
2585 if (Copies.size() > 2)
2586 return false;
2587
2588 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end();
2589 UI != UE; ++UI) {
2590 SDValue UseChain = UI->getOperand(0);
2591 if (Copies.count(UseChain.getNode()))
2592 // Second CopyToReg
2593 Copy = *UI;
2594 else {
2595 // We are at the top of this chain.
2596 // If the copy has a glue operand, we conservatively assume it
2597 // isn't safe to perform a tail call.
2598 if (UI->getOperand(UI->getNumOperands()-1).getValueType() == MVT::Glue)
2599 return false;
2600 // First CopyToReg
2601 TCChain = UseChain;
2602 }
2603 }
2604 } else if (Copy->getOpcode() == ISD::BITCAST) {
2605 // f32 returned in a single GPR.
2606 if (!Copy->hasOneUse())
2607 return false;
2608 Copy = *Copy->use_begin();
2609 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0))
2610 return false;
2611 // If the copy has a glue operand, we conservatively assume it isn't safe to
2612 // perform a tail call.
2613 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2614 return false;
2615 TCChain = Copy->getOperand(0);
2616 } else {
2617 return false;
2618 }
2619
2620 bool HasRet = false;
2621 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2622 UI != UE; ++UI) {
2623 if (UI->getOpcode() != ARMISD::RET_FLAG &&
2624 UI->getOpcode() != ARMISD::INTRET_FLAG)
2625 return false;
2626 HasRet = true;
2627 }
2628
2629 if (!HasRet)
2630 return false;
2631
2632 Chain = TCChain;
2633 return true;
2634}
2635
2636bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
2637 if (!Subtarget->supportsTailCall())
2638 return false;
2639
2640 auto Attr =
2641 CI->getParent()->getParent()->getFnAttribute("disable-tail-calls");
2642 if (!CI->isTailCall() || Attr.getValueAsString() == "true")
2643 return false;
2644
2645 return true;
2646}
2647
2648// Trying to write a 64 bit value so need to split into two 32 bit values first,
2649// and pass the lower and high parts through.
2650static SDValue LowerWRITE_REGISTER(SDValue Op, SelectionDAG &DAG) {
2651 SDLoc DL(Op);
2652 SDValue WriteValue = Op->getOperand(2);
2653
2654 // This function is only supposed to be called for i64 type argument.
2655 assert(WriteValue.getValueType() == MVT::i64((WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? static_cast<void> (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2656, __PRETTY_FUNCTION__))
2656 && "LowerWRITE_REGISTER called for non-i64 type argument.")((WriteValue.getValueType() == MVT::i64 && "LowerWRITE_REGISTER called for non-i64 type argument."
) ? static_cast<void> (0) : __assert_fail ("WriteValue.getValueType() == MVT::i64 && \"LowerWRITE_REGISTER called for non-i64 type argument.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2656, __PRETTY_FUNCTION__))
;
2657
2658 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2659 DAG.getConstant(0, DL, MVT::i32));
2660 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, WriteValue,
2661 DAG.getConstant(1, DL, MVT::i32));
2662 SDValue Ops[] = { Op->getOperand(0), Op->getOperand(1), Lo, Hi };
2663 return DAG.getNode(ISD::WRITE_REGISTER, DL, MVT::Other, Ops);
2664}
2665
2666// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
2667// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is
2668// one of the above mentioned nodes. It has to be wrapped because otherwise
2669// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only
2670// be used to form addressing mode. These wrapped nodes will be selected
2671// into MOVi.
2672static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
2673 EVT PtrVT = Op.getValueType();
2674 // FIXME there is no actual debug info here
2675 SDLoc dl(Op);
2676 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2677 SDValue Res;
2678 if (CP->isMachineConstantPoolEntry())
2679 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2680 CP->getAlignment());
2681 else
2682 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2683 CP->getAlignment());
2684 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res);
2685}
2686
2687unsigned ARMTargetLowering::getJumpTableEncoding() const {
2688 return MachineJumpTableInfo::EK_Inline;
2689}
2690
2691SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
2692 SelectionDAG &DAG) const {
2693 MachineFunction &MF = DAG.getMachineFunction();
2694 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2695 unsigned ARMPCLabelIndex = 0;
2696 SDLoc DL(Op);
2697 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2698 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
2699 SDValue CPAddr;
2700 bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
2701 if (!IsPositionIndependent) {
2702 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
2703 } else {
2704 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
2705 ARMPCLabelIndex = AFI->createPICLabelUId();
2706 ARMConstantPoolValue *CPV =
2707 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex,
2708 ARMCP::CPBlockAddress, PCAdj);
2709 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2710 }
2711 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr);
2712 SDValue Result = DAG.getLoad(
2713 PtrVT, DL, DAG.getEntryNode(), CPAddr,
2714 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2715 if (!IsPositionIndependent)
2716 return Result;
2717 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, DL, MVT::i32);
2718 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
2719}
2720
2721/// \brief Convert a TLS address reference into the correct sequence of loads
2722/// and calls to compute the variable's address for Darwin, and return an
2723/// SDValue containing the final node.
2724
2725/// Darwin only has one TLS scheme which must be capable of dealing with the
2726/// fully general situation, in the worst case. This means:
2727/// + "extern __thread" declaration.
2728/// + Defined in a possibly unknown dynamic library.
2729///
2730/// The general system is that each __thread variable has a [3 x i32] descriptor
2731/// which contains information used by the runtime to calculate the address. The
2732/// only part of this the compiler needs to know about is the first word, which
2733/// contains a function pointer that must be called with the address of the
2734/// entire descriptor in "r0".
2735///
2736/// Since this descriptor may be in a different unit, in general access must
2737/// proceed along the usual ARM rules. A common sequence to produce is:
2738///
2739/// movw rT1, :lower16:_var$non_lazy_ptr
2740/// movt rT1, :upper16:_var$non_lazy_ptr
2741/// ldr r0, [rT1]
2742/// ldr rT2, [r0]
2743/// blx rT2
2744/// [...address now in r0...]
2745SDValue
2746ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
2747 SelectionDAG &DAG) const {
2748 assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin")((Subtarget->isTargetDarwin() && "TLS only supported on Darwin"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetDarwin() && \"TLS only supported on Darwin\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2748, __PRETTY_FUNCTION__))
;
2749 SDLoc DL(Op);
2750
2751 // First step is to get the address of the actua global symbol. This is where
2752 // the TLS descriptor lives.
2753 SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
2754
2755 // The first entry in the descriptor is a function pointer that we must call
2756 // to obtain the address of the variable.
2757 SDValue Chain = DAG.getEntryNode();
2758 SDValue FuncTLVGet = DAG.getLoad(
2759 MVT::i32, DL, Chain, DescAddr,
2760 MachinePointerInfo::getGOT(DAG.getMachineFunction()),
2761 /* Alignment = */ 4,
2762 MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
2763 MachineMemOperand::MOInvariant);
2764 Chain = FuncTLVGet.getValue(1);
2765
2766 MachineFunction &F = DAG.getMachineFunction();
2767 MachineFrameInfo &MFI = F.getFrameInfo();
2768 MFI.setAdjustsStack(true);
2769
2770 // TLS calls preserve all registers except those that absolutely must be
2771 // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
2772 // silly).
2773 auto TRI =
2774 getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
2775 auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
2776 const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
2777
2778 // Finally, we can make the call. This is just a degenerate version of a
2779 // normal AArch64 call node: r0 takes the address of the descriptor, and
2780 // returns the address of the variable in this thread.
2781 Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
2782 Chain =
2783 DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
2784 Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
2785 DAG.getRegisterMask(Mask), Chain.getValue(1));
2786 return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
2787}
2788
2789SDValue
2790ARMTargetLowering::LowerGlobalTLSAddressWindows(SDValue Op,
2791 SelectionDAG &DAG) const {
2792 assert(Subtarget->isTargetWindows() && "Windows specific TLS lowering")((Subtarget->isTargetWindows() && "Windows specific TLS lowering"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"Windows specific TLS lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2792, __PRETTY_FUNCTION__))
;
2793
2794 SDValue Chain = DAG.getEntryNode();
2795 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2796 SDLoc DL(Op);
2797
2798 // Load the current TEB (thread environment block)
2799 SDValue Ops[] = {Chain,
2800 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
2801 DAG.getConstant(15, DL, MVT::i32),
2802 DAG.getConstant(0, DL, MVT::i32),
2803 DAG.getConstant(13, DL, MVT::i32),
2804 DAG.getConstant(0, DL, MVT::i32),
2805 DAG.getConstant(2, DL, MVT::i32)};
2806 SDValue CurrentTEB = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
2807 DAG.getVTList(MVT::i32, MVT::Other), Ops);
2808
2809 SDValue TEB = CurrentTEB.getValue(0);
2810 Chain = CurrentTEB.getValue(1);
2811
2812 // Load the ThreadLocalStoragePointer from the TEB
2813 // A pointer to the TLS array is located at offset 0x2c from the TEB.
2814 SDValue TLSArray =
2815 DAG.getNode(ISD::ADD, DL, PtrVT, TEB, DAG.getIntPtrConstant(0x2c, DL));
2816 TLSArray = DAG.getLoad(PtrVT, DL, Chain, TLSArray, MachinePointerInfo());
2817
2818 // The pointer to the thread's TLS data area is at the TLS Index scaled by 4
2819 // offset into the TLSArray.
2820
2821 // Load the TLS index from the C runtime
2822 SDValue TLSIndex =
2823 DAG.getTargetExternalSymbol("_tls_index", PtrVT, ARMII::MO_NO_FLAG);
2824 TLSIndex = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, TLSIndex);
2825 TLSIndex = DAG.getLoad(PtrVT, DL, Chain, TLSIndex, MachinePointerInfo());
2826
2827 SDValue Slot = DAG.getNode(ISD::SHL, DL, PtrVT, TLSIndex,
2828 DAG.getConstant(2, DL, MVT::i32));
2829 SDValue TLS = DAG.getLoad(PtrVT, DL, Chain,
2830 DAG.getNode(ISD::ADD, DL, PtrVT, TLSArray, Slot),
2831 MachinePointerInfo());
2832
2833 // Get the offset of the start of the .tls section (section base)
2834 const auto *GA = cast<GlobalAddressSDNode>(Op);
2835 auto *CPV = ARMConstantPoolConstant::Create(GA->getGlobal(), ARMCP::SECREL);
2836 SDValue Offset = DAG.getLoad(
2837 PtrVT, DL, Chain, DAG.getNode(ARMISD::Wrapper, DL, MVT::i32,
2838 DAG.getTargetConstantPool(CPV, PtrVT, 4)),
2839 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2840
2841 return DAG.getNode(ISD::ADD, DL, PtrVT, TLS, Offset);
2842}
2843
2844// Lower ISD::GlobalTLSAddress using the "general dynamic" model
2845SDValue
2846ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
2847 SelectionDAG &DAG) const {
2848 SDLoc dl(GA);
2849 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2850 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2851 MachineFunction &MF = DAG.getMachineFunction();
2852 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2853 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2854 ARMConstantPoolValue *CPV =
2855 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2856 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true);
2857 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2858 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument);
2859 Argument = DAG.getLoad(
2860 PtrVT, dl, DAG.getEntryNode(), Argument,
2861 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2862 SDValue Chain = Argument.getValue(1);
2863
2864 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2865 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
2866
2867 // call __tls_get_addr.
2868 ArgListTy Args;
2869 ArgListEntry Entry;
2870 Entry.Node = Argument;
2871 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext());
2872 Args.push_back(Entry);
2873
2874 // FIXME: is there useful debug info available here?
2875 TargetLowering::CallLoweringInfo CLI(DAG);
2876 CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
2877 CallingConv::C, Type::getInt32Ty(*DAG.getContext()),
2878 DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args));
2879
2880 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
2881 return CallResult.first;
2882}
2883
2884// Lower ISD::GlobalTLSAddress using the "initial exec" or
2885// "local exec" model.
2886SDValue
2887ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
2888 SelectionDAG &DAG,
2889 TLSModel::Model model) const {
2890 const GlobalValue *GV = GA->getGlobal();
2891 SDLoc dl(GA);
2892 SDValue Offset;
2893 SDValue Chain = DAG.getEntryNode();
2894 EVT PtrVT = getPointerTy(DAG.getDataLayout());
2895 // Get the Thread Pointer
2896 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
2897
2898 if (model == TLSModel::InitialExec) {
2899 MachineFunction &MF = DAG.getMachineFunction();
2900 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2901 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
2902 // Initial exec model.
2903 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
2904 ARMConstantPoolValue *CPV =
2905 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex,
2906 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF,
2907 true);
2908 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2909 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2910 Offset = DAG.getLoad(
2911 PtrVT, dl, Chain, Offset,
2912 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2913 Chain = Offset.getValue(1);
2914
2915 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
2916 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
2917
2918 Offset = DAG.getLoad(
2919 PtrVT, dl, Chain, Offset,
2920 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2921 } else {
2922 // local exec model
2923 assert(model == TLSModel::LocalExec)((model == TLSModel::LocalExec) ? static_cast<void> (0)
: __assert_fail ("model == TLSModel::LocalExec", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2923, __PRETTY_FUNCTION__))
;
2924 ARMConstantPoolValue *CPV =
2925 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF);
2926 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4);
2927 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset);
2928 Offset = DAG.getLoad(
2929 PtrVT, dl, Chain, Offset,
2930 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
2931 }
2932
2933 // The address of the thread local variable is the add of the thread
2934 // pointer with the offset of the variable.
2935 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset);
2936}
2937
2938SDValue
2939ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
2940 if (Subtarget->isTargetDarwin())
2941 return LowerGlobalTLSAddressDarwin(Op, DAG);
2942
2943 if (Subtarget->isTargetWindows())
2944 return LowerGlobalTLSAddressWindows(Op, DAG);
2945
2946 // TODO: implement the "local dynamic" model
2947 assert(Subtarget->isTargetELF() && "Only ELF implemented here")((Subtarget->isTargetELF() && "Only ELF implemented here"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetELF() && \"Only ELF implemented here\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2947, __PRETTY_FUNCTION__))
;
2948 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
2949 if (DAG.getTarget().Options.EmulatedTLS)
2950 return LowerToTLSEmulatedModel(GA, DAG);
2951
2952 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal());
2953
2954 switch (model) {
2955 case TLSModel::GeneralDynamic:
2956 case TLSModel::LocalDynamic:
2957 return LowerToTLSGeneralDynamicModel(GA, DAG);
2958 case TLSModel::InitialExec:
2959 case TLSModel::LocalExec:
2960 return LowerToTLSExecModels(GA, DAG, model);
2961 }
2962 llvm_unreachable("bogus TLS model")::llvm::llvm_unreachable_internal("bogus TLS model", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 2962)
;
2963}
2964
2965/// Return true if all users of V are within function F, looking through
2966/// ConstantExprs.
2967static bool allUsersAreInFunction(const Value *V, const Function *F) {
2968 SmallVector<const User*,4> Worklist;
2969 for (auto *U : V->users())
2970 Worklist.push_back(U);
2971 while (!Worklist.empty()) {
2972 auto *U = Worklist.pop_back_val();
2973 if (isa<ConstantExpr>(U)) {
2974 for (auto *UU : U->users())
2975 Worklist.push_back(UU);
2976 continue;
2977 }
2978
2979 auto *I = dyn_cast<Instruction>(U);
2980 if (!I || I->getParent()->getParent() != F)
2981 return false;
2982 }
2983 return true;
2984}
2985
2986/// Return true if all users of V are within some (any) function, looking through
2987/// ConstantExprs. In other words, are there any global constant users?
2988static bool allUsersAreInFunctions(const Value *V) {
2989 SmallVector<const User*,4> Worklist;
2990 for (auto *U : V->users())
2991 Worklist.push_back(U);
2992 while (!Worklist.empty()) {
2993 auto *U = Worklist.pop_back_val();
2994 if (isa<ConstantExpr>(U)) {
2995 for (auto *UU : U->users())
2996 Worklist.push_back(UU);
2997 continue;
2998 }
2999
3000 if (!isa<Instruction>(U))
3001 return false;
3002 }
3003 return true;
3004}
3005
3006// Return true if T is an integer, float or an array/vector of either.
3007static bool isSimpleType(Type *T) {
3008 if (T->isIntegerTy() || T->isFloatingPointTy())
3009 return true;
3010 Type *SubT = nullptr;
3011 if (T->isArrayTy())
3012 SubT = T->getArrayElementType();
3013 else if (T->isVectorTy())
3014 SubT = T->getVectorElementType();
3015 else
3016 return false;
3017 return SubT->isIntegerTy() || SubT->isFloatingPointTy();
3018}
3019
3020static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
3021 EVT PtrVT, const SDLoc &dl) {
3022 // If we're creating a pool entry for a constant global with unnamed address,
3023 // and the global is small enough, we can emit it inline into the constant pool
3024 // to save ourselves an indirection.
3025 //
3026 // This is a win if the constant is only used in one function (so it doesn't
3027 // need to be duplicated) or duplicating the constant wouldn't increase code
3028 // size (implying the constant is no larger than 4 bytes).
3029 const Function *F = DAG.getMachineFunction().getFunction();
3030
3031 // We rely on this decision to inline being idemopotent and unrelated to the
3032 // use-site. We know that if we inline a variable at one use site, we'll
3033 // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
3034 // doesn't know about this optimization, so bail out if it's enabled else
3035 // we could decide to inline here (and thus never emit the GV) but require
3036 // the GV from fast-isel generated code.
3037 if (!EnableConstpoolPromotion ||
3038 DAG.getMachineFunction().getTarget().Options.EnableFastISel)
3039 return SDValue();
3040
3041 auto *GVar = dyn_cast<GlobalVariable>(GV);
3042 if (!GVar || !GVar->hasInitializer() ||
3043 !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
3044 !GVar->hasLocalLinkage())
3045 return SDValue();
3046
3047 // Ensure that we don't try and inline any type that contains pointers. If
3048 // we inline a value that contains relocations, we move the relocations from
3049 // .data to .text which is not ideal.
3050 auto *Init = GVar->getInitializer();
3051 if (!isSimpleType(Init->getType()))
3052 return SDValue();
3053
3054 // The constant islands pass can only really deal with alignment requests
3055 // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
3056 // any type wanting greater alignment requirements than 4 bytes. We also
3057 // can only promote constants that are multiples of 4 bytes in size or
3058 // are paddable to a multiple of 4. Currently we only try and pad constants
3059 // that are strings for simplicity.
3060 auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
3061 unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
3062 unsigned Align = GVar->getAlignment();
3063 unsigned RequiredPadding = 4 - (Size % 4);
3064 bool PaddingPossible =
3065 RequiredPadding == 4 || (CDAInit && CDAInit->isString());
3066 if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize)
3067 return SDValue();
3068
3069 unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
3070 MachineFunction &MF = DAG.getMachineFunction();
3071 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3072
3073 // We can't bloat the constant pool too much, else the ConstantIslands pass
3074 // may fail to converge. If we haven't promoted this global yet (it may have
3075 // multiple uses), and promoting it would increase the constant pool size (Sz
3076 // > 4), ensure we have space to do so up to MaxTotal.
3077 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
3078 if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
3079 ConstpoolPromotionMaxTotal)
3080 return SDValue();
3081
3082 // This is only valid if all users are in a single function OR it has users
3083 // in multiple functions but it no larger than a pointer. We also check if
3084 // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
3085 // address taken.
3086 if (!allUsersAreInFunction(GVar, F) &&
3087 !(Size <= 4 && allUsersAreInFunctions(GVar)))
3088 return SDValue();
3089
3090 // We're going to inline this global. Pad it out if needed.
3091 if (RequiredPadding != 4) {
3092 StringRef S = CDAInit->getAsString();
3093
3094 SmallVector<uint8_t,16> V(S.size());
3095 std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
3096 while (RequiredPadding--)
3097 V.push_back(0);
3098 Init = ConstantDataArray::get(*DAG.getContext(), V);
3099 }
3100
3101 auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
3102 SDValue CPAddr =
3103 DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
3104 if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
3105 AFI->markGlobalAsPromotedToConstantPool(GVar);
3106 AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
3107 PaddedSize - 4);
3108 }
3109 ++NumConstpoolPromoted;
3110 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3111}
3112
3113static bool isReadOnly(const GlobalValue *GV) {
3114 if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
3115 GV = GA->getBaseObject();
3116 return (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
3117 isa<Function>(GV);
3118}
3119
3120SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
3121 SelectionDAG &DAG) const {
3122 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3123 SDLoc dl(Op);
3124 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3125 const TargetMachine &TM = getTargetMachine();
3126 bool IsRO = isReadOnly(GV);
3127
3128 // promoteToConstantPool only if not generating XO text section
3129 if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
3130 if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
3131 return V;
3132
3133 if (isPositionIndependent()) {
3134 bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
3135
3136 MachineFunction &MF = DAG.getMachineFunction();
3137 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3138 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3139 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3140 SDLoc dl(Op);
3141 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
3142 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(
3143 GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj,
3144 UseGOT_PREL ? ARMCP::GOT_PREL : ARMCP::no_modifier,
3145 /*AddCurrentAddress=*/UseGOT_PREL);
3146 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3147 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3148 SDValue Result = DAG.getLoad(
3149 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3150 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3151 SDValue Chain = Result.getValue(1);
3152 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3153 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3154 if (UseGOT_PREL)
3155 Result =
3156 DAG.getLoad(PtrVT, dl, Chain, Result,
3157 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3158 return Result;
3159 } else if (Subtarget->isROPI() && IsRO) {
3160 // PC-relative.
3161 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
3162 SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
3163 return Result;
3164 } else if (Subtarget->isRWPI() && !IsRO) {
3165 // SB-relative.
3166 SDValue RelAddr;
3167 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3168 ++NumMovwMovt;
3169 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_SBREL);
3170 RelAddr = DAG.getNode(ARMISD::Wrapper, dl, PtrVT, G);
3171 } else { // use literal pool for address constant
3172 ARMConstantPoolValue *CPV =
3173 ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
3174 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3175 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3176 RelAddr = DAG.getLoad(
3177 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3178 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3179 }
3180 SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
3181 SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, RelAddr);
3182 return Result;
3183 }
3184
3185 // If we have T2 ops, we can materialize the address directly via movt/movw
3186 // pair. This is always cheaper.
3187 if (Subtarget->useMovt(DAG.getMachineFunction())) {
3188 ++NumMovwMovt;
3189 // FIXME: Once remat is capable of dealing with instructions with register
3190 // operands, expand this into two nodes.
3191 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT,
3192 DAG.getTargetGlobalAddress(GV, dl, PtrVT));
3193 } else {
3194 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
3195 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3196 return DAG.getLoad(
3197 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3198 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3199 }
3200}
3201
3202SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
3203 SelectionDAG &DAG) const {
3204 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3205, __PRETTY_FUNCTION__))
3205 "ROPI/RWPI not currently supported for Darwin")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Darwin") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Darwin\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3205, __PRETTY_FUNCTION__))
;
3206 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3207 SDLoc dl(Op);
3208 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3209
3210 if (Subtarget->useMovt(DAG.getMachineFunction()))
3211 ++NumMovwMovt;
3212
3213 // FIXME: Once remat is capable of dealing with instructions with register
3214 // operands, expand this into multiple nodes
3215 unsigned Wrapper =
3216 isPositionIndependent() ? ARMISD::WrapperPIC : ARMISD::Wrapper;
3217
3218 SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, ARMII::MO_NONLAZY);
3219 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, G);
3220
3221 if (Subtarget->isGVIndirectSymbol(GV))
3222 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result,
3223 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3224 return Result;
3225}
3226
3227SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
3228 SelectionDAG &DAG) const {
3229 assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported")((Subtarget->isTargetWindows() && "non-Windows COFF is not supported"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"non-Windows COFF is not supported\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3229, __PRETTY_FUNCTION__))
;
3230 assert(Subtarget->useMovt(DAG.getMachineFunction()) &&((Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3231, __PRETTY_FUNCTION__))
3231 "Windows on ARM expects to use movw/movt")((Subtarget->useMovt(DAG.getMachineFunction()) && "Windows on ARM expects to use movw/movt"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->useMovt(DAG.getMachineFunction()) && \"Windows on ARM expects to use movw/movt\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3231, __PRETTY_FUNCTION__))
;
3232 assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3233, __PRETTY_FUNCTION__))
3233 "ROPI/RWPI not currently supported for Windows")((!Subtarget->isROPI() && !Subtarget->isRWPI() &&
"ROPI/RWPI not currently supported for Windows") ? static_cast
<void> (0) : __assert_fail ("!Subtarget->isROPI() && !Subtarget->isRWPI() && \"ROPI/RWPI not currently supported for Windows\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3233, __PRETTY_FUNCTION__))
;
3234
3235 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
3236 const ARMII::TOF TargetFlags =
3237 (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG);
3238 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3239 SDValue Result;
3240 SDLoc DL(Op);
3241
3242 ++NumMovwMovt;
3243
3244 // FIXME: Once remat is capable of dealing with instructions with register
3245 // operands, expand this into two nodes.
3246 Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT,
3247 DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0,
3248 TargetFlags));
3249 if (GV->hasDLLImportStorageClass())
3250 Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result,
3251 MachinePointerInfo::getGOT(DAG.getMachineFunction()));
3252 return Result;
3253}
3254
3255SDValue
3256ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const {
3257 SDLoc dl(Op);
3258 SDValue Val = DAG.getConstant(0, dl, MVT::i32);
3259 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl,
3260 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
3261 Op.getOperand(1), Val);
3262}
3263
3264SDValue
3265ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const {
3266 SDLoc dl(Op);
3267 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0),
3268 Op.getOperand(1), DAG.getConstant(0, dl, MVT::i32));
3269}
3270
3271SDValue ARMTargetLowering::LowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
3272 SelectionDAG &DAG) const {
3273 SDLoc dl(Op);
3274 return DAG.getNode(ARMISD::EH_SJLJ_SETUP_DISPATCH, dl, MVT::Other,
3275 Op.getOperand(0));
3276}
3277
3278SDValue
3279ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG,
3280 const ARMSubtarget *Subtarget) const {
3281 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
3282 SDLoc dl(Op);
3283 switch (IntNo) {
3284 default: return SDValue(); // Don't custom lower most intrinsics.
3285 case Intrinsic::thread_pointer: {
3286 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3287 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
3288 }
3289 case Intrinsic::eh_sjlj_lsda: {
3290 MachineFunction &MF = DAG.getMachineFunction();
3291 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3292 unsigned ARMPCLabelIndex = AFI->createPICLabelUId();
3293 EVT PtrVT = getPointerTy(DAG.getDataLayout());
3294 SDValue CPAddr;
3295 bool IsPositionIndependent = isPositionIndependent();
3296 unsigned PCAdj = IsPositionIndependent ? (Subtarget->isThumb() ? 4 : 8) : 0;
3297 ARMConstantPoolValue *CPV =
3298 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex,
3299 ARMCP::CPLSDA, PCAdj);
3300 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
3301 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
3302 SDValue Result = DAG.getLoad(
3303 PtrVT, dl, DAG.getEntryNode(), CPAddr,
3304 MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
3305
3306 if (IsPositionIndependent) {
3307 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, dl, MVT::i32);
3308 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
3309 }
3310 return Result;
3311 }
3312 case Intrinsic::arm_neon_vmulls:
3313 case Intrinsic::arm_neon_vmullu: {
3314 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls)
3315 ? ARMISD::VMULLs : ARMISD::VMULLu;
3316 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3317 Op.getOperand(1), Op.getOperand(2));
3318 }
3319 case Intrinsic::arm_neon_vminnm:
3320 case Intrinsic::arm_neon_vmaxnm: {
3321 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminnm)
3322 ? ISD::FMINNUM : ISD::FMAXNUM;
3323 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3324 Op.getOperand(1), Op.getOperand(2));
3325 }
3326 case Intrinsic::arm_neon_vminu:
3327 case Intrinsic::arm_neon_vmaxu: {
3328 if (Op.getValueType().isFloatingPoint())
3329 return SDValue();
3330 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vminu)
3331 ? ISD::UMIN : ISD::UMAX;
3332 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3333 Op.getOperand(1), Op.getOperand(2));
3334 }
3335 case Intrinsic::arm_neon_vmins:
3336 case Intrinsic::arm_neon_vmaxs: {
3337 // v{min,max}s is overloaded between signed integers and floats.
3338 if (!Op.getValueType().isFloatingPoint()) {
3339 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3340 ? ISD::SMIN : ISD::SMAX;
3341 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3342 Op.getOperand(1), Op.getOperand(2));
3343 }
3344 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmins)
3345 ? ISD::FMINNAN : ISD::FMAXNAN;
3346 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(),
3347 Op.getOperand(1), Op.getOperand(2));
3348 }
3349 }
3350}
3351
3352static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG,
3353 const ARMSubtarget *Subtarget) {
3354 // FIXME: handle "fence singlethread" more efficiently.
3355 SDLoc dl(Op);
3356 if (!Subtarget->hasDataBarrier()) {
3357 // Some ARMv6 cpus can support data barriers with an mcr instruction.
3358 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get
3359 // here.
3360 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() &&((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3361, __PRETTY_FUNCTION__))
3361 "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!")((Subtarget->hasV6Ops() && !Subtarget->isThumb(
) && "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->hasV6Ops() && !Subtarget->isThumb() && \"Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3361, __PRETTY_FUNCTION__))
;
3362 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0),
3363 DAG.getConstant(0, dl, MVT::i32));
3364 }
3365
3366 ConstantSDNode *OrdN = cast<ConstantSDNode>(Op.getOperand(1));
3367 AtomicOrdering Ord = static_cast<AtomicOrdering>(OrdN->getZExtValue());
3368 ARM_MB::MemBOpt Domain = ARM_MB::ISH;
3369 if (Subtarget->isMClass()) {
3370 // Only a full system barrier exists in the M-class architectures.
3371 Domain = ARM_MB::SY;
3372 } else if (Subtarget->preferISHSTBarriers() &&
3373 Ord == AtomicOrdering::Release) {
3374 // Swift happens to implement ISHST barriers in a way that's compatible with
3375 // Release semantics but weaker than ISH so we'd be fools not to use
3376 // it. Beware: other processors probably don't!
3377 Domain = ARM_MB::ISHST;
3378 }
3379
3380 return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0),
3381 DAG.getConstant(Intrinsic::arm_dmb, dl, MVT::i32),
3382 DAG.getConstant(Domain, dl, MVT::i32));
3383}
3384
3385static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG,
3386 const ARMSubtarget *Subtarget) {
3387 // ARM pre v5TE and Thumb1 does not have preload instructions.
3388 if (!(Subtarget->isThumb2() ||
3389 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps())))
3390 // Just preserve the chain.
3391 return Op.getOperand(0);
3392
3393 SDLoc dl(Op);
3394 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1;
3395 if (!isRead &&
3396 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension()))
3397 // ARMv7 with MP extension has PLDW.
3398 return Op.getOperand(0);
3399
3400 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue();
3401 if (Subtarget->isThumb()) {
3402 // Invert the bits.
3403 isRead = ~isRead & 1;
3404 isData = ~isData & 1;
3405 }
3406
3407 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0),
3408 Op.getOperand(1), DAG.getConstant(isRead, dl, MVT::i32),
3409 DAG.getConstant(isData, dl, MVT::i32));
3410}
3411
3412static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) {
3413 MachineFunction &MF = DAG.getMachineFunction();
3414 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>();
3415
3416 // vastart just stores the address of the VarArgsFrameIndex slot into the
3417 // memory location argument.
3418 SDLoc dl(Op);
3419 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
3420 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3421 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3422 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3423 MachinePointerInfo(SV));
3424}
3425
3426SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
3427 CCValAssign &NextVA,
3428 SDValue &Root,
3429 SelectionDAG &DAG,
3430 const SDLoc &dl) const {
3431 MachineFunction &MF = DAG.getMachineFunction();
3432 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3433
3434 const TargetRegisterClass *RC;
3435 if (AFI->isThumb1OnlyFunction())
3436 RC = &ARM::tGPRRegClass;
3437 else
3438 RC = &ARM::GPRRegClass;
3439
3440 // Transform the arguments stored in physical registers into virtual ones.
3441 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3442 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3443
3444 SDValue ArgValue2;
3445 if (NextVA.isMemLoc()) {
3446 MachineFrameInfo &MFI = MF.getFrameInfo();
3447 int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
3448
3449 // Create load node to retrieve arguments from the stack.
3450 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
3451 ArgValue2 = DAG.getLoad(
3452 MVT::i32, dl, Root, FIN,
3453 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3454 } else {
3455 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
3456 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
3457 }
3458 if (!Subtarget->isLittle())
3459 std::swap (ArgValue, ArgValue2);
3460 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
3461}
3462
3463// The remaining GPRs hold either the beginning of variable-argument
3464// data, or the beginning of an aggregate passed by value (usually
3465// byval). Either way, we allocate stack slots adjacent to the data
3466// provided by our caller, and store the unallocated registers there.
3467// If this is a variadic function, the va_list pointer will begin with
3468// these values; otherwise, this reassembles a (byval) structure that
3469// was split between registers and memory.
3470// Return: The frame index registers were stored into.
3471int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
3472 const SDLoc &dl, SDValue &Chain,
3473 const Value *OrigArg,
3474 unsigned InRegsParamRecordIdx,
3475 int ArgOffset, unsigned ArgSize) const {
3476 // Currently, two use-cases possible:
3477 // Case #1. Non-var-args function, and we meet first byval parameter.
3478 // Setup first unallocated register as first byval register;
3479 // eat all remained registers
3480 // (these two actions are performed by HandleByVal method).
3481 // Then, here, we initialize stack frame with
3482 // "store-reg" instructions.
3483 // Case #2. Var-args function, that doesn't contain byval parameters.
3484 // The same: eat all remained unallocated registers,
3485 // initialize stack frame.
3486
3487 MachineFunction &MF = DAG.getMachineFunction();
3488 MachineFrameInfo &MFI = MF.getFrameInfo();
3489 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3490 unsigned RBegin, REnd;
3491 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
3492 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd);
3493 } else {
3494 unsigned RBeginIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3495 RBegin = RBeginIdx == 4 ? (unsigned)ARM::R4 : GPRArgRegs[RBeginIdx];
3496 REnd = ARM::R4;
3497 }
3498
3499 if (REnd != RBegin)
3500 ArgOffset = -4 * (ARM::R4 - RBegin);
3501
3502 auto PtrVT = getPointerTy(DAG.getDataLayout());
3503 int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
3504 SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
3505
3506 SmallVector<SDValue, 4> MemOps;
3507 const TargetRegisterClass *RC =
3508 AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
3509
3510 for (unsigned Reg = RBegin, i = 0; Reg < REnd; ++Reg, ++i) {
3511 unsigned VReg = MF.addLiveIn(Reg, RC);
3512 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32);
3513 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
3514 MachinePointerInfo(OrigArg, 4 * i));
3515 MemOps.push_back(Store);
3516 FIN = DAG.getNode(ISD::ADD, dl, PtrVT, FIN, DAG.getConstant(4, dl, PtrVT));
3517 }
3518
3519 if (!MemOps.empty())
3520 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
3521 return FrameIndex;
3522}
3523
3524// Setup stack frame, the va_list pointer will start from.
3525void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG,
3526 const SDLoc &dl, SDValue &Chain,
3527 unsigned ArgOffset,
3528 unsigned TotalArgRegsSaveSize,
3529 bool ForceMutable) const {
3530 MachineFunction &MF = DAG.getMachineFunction();
3531 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3532
3533 // Try to store any remaining integer argument regs
3534 // to their spots on the stack so that they may be loaded by dereferencing
3535 // the result of va_next.
3536 // If there is no regs to be stored, just point address after last
3537 // argument passed via stack.
3538 int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, nullptr,
3539 CCInfo.getInRegsParamsCount(),
3540 CCInfo.getNextStackOffset(), 4);
3541 AFI->setVarArgsFrameIndex(FrameIndex);
3542}
3543
3544SDValue ARMTargetLowering::LowerFormalArguments(
3545 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3546 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3547 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3548 MachineFunction &MF = DAG.getMachineFunction();
3549 MachineFrameInfo &MFI = MF.getFrameInfo();
3550
3551 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
3552
3553 // Assign locations to all of the incoming arguments.
3554 SmallVector<CCValAssign, 16> ArgLocs;
3555 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
3556 *DAG.getContext());
3557 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
3558
3559 SmallVector<SDValue, 16> ArgValues;
3560 SDValue ArgValue;
3561 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin();
3562 unsigned CurArgIdx = 0;
3563
3564 // Initially ArgRegsSaveSize is zero.
3565 // Then we increase this value each time we meet byval parameter.
3566 // We also increase this value in case of varargs function.
3567 AFI->setArgRegsSaveSize(0);
3568
3569 // Calculate the amount of stack space that we need to allocate to store
3570 // byval and variadic arguments that are passed in registers.
3571 // We need to know this before we allocate the first byval or variadic
3572 // argument, as they will be allocated a stack slot below the CFA (Canonical
3573 // Frame Address, the stack pointer at entry to the function).
3574 unsigned ArgRegBegin = ARM::R4;
3575 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3576 if (CCInfo.getInRegsParamsProcessed() >= CCInfo.getInRegsParamsCount())
3577 break;
3578
3579 CCValAssign &VA = ArgLocs[i];
3580 unsigned Index = VA.getValNo();
3581 ISD::ArgFlagsTy Flags = Ins[Index].Flags;
3582 if (!Flags.isByVal())
3583 continue;
3584
3585 assert(VA.isMemLoc() && "unexpected byval pointer in reg")((VA.isMemLoc() && "unexpected byval pointer in reg")
? static_cast<void> (0) : __assert_fail ("VA.isMemLoc() && \"unexpected byval pointer in reg\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3585, __PRETTY_FUNCTION__))
;
3586 unsigned RBegin, REnd;
3587 CCInfo.getInRegsParamInfo(CCInfo.getInRegsParamsProcessed(), RBegin, REnd);
3588 ArgRegBegin = std::min(ArgRegBegin, RBegin);
3589
3590 CCInfo.nextInRegsParam();
3591 }
3592 CCInfo.rewindByValRegsInfo();
3593
3594 int lastInsIndex = -1;
3595 if (isVarArg && MFI.hasVAStart()) {
3596 unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
3597 if (RegIdx != array_lengthof(GPRArgRegs))
3598 ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
3599 }
3600
3601 unsigned TotalArgRegsSaveSize = 4 * (ARM::R4 - ArgRegBegin);
3602 AFI->setArgRegsSaveSize(TotalArgRegsSaveSize);
3603 auto PtrVT = getPointerTy(DAG.getDataLayout());
3604
3605 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
3606 CCValAssign &VA = ArgLocs[i];
3607 if (Ins[VA.getValNo()].isOrigArg()) {
3608 std::advance(CurOrigArg,
3609 Ins[VA.getValNo()].getOrigArgIndex() - CurArgIdx);
3610 CurArgIdx = Ins[VA.getValNo()].getOrigArgIndex();
3611 }
3612 // Arguments stored in registers.
3613 if (VA.isRegLoc()) {
3614 EVT RegVT = VA.getLocVT();
3615
3616 if (VA.needsCustom()) {
3617 // f64 and vector types are split up into multiple registers or
3618 // combinations of registers and stack slots.
3619 if (VA.getLocVT() == MVT::v2f64) {
3620 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i],
3621 Chain, DAG, dl);
3622 VA = ArgLocs[++i]; // skip ahead to next loc
3623 SDValue ArgValue2;
3624 if (VA.isMemLoc()) {
3625 int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
3626 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3627 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
3628 MachinePointerInfo::getFixedStack(
3629 DAG.getMachineFunction(), FI));
3630 } else {
3631 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i],
3632 Chain, DAG, dl);
3633 }
3634 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
3635 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3636 ArgValue, ArgValue1,
3637 DAG.getIntPtrConstant(0, dl));
3638 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64,
3639 ArgValue, ArgValue2,
3640 DAG.getIntPtrConstant(1, dl));
3641 } else
3642 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl);
3643
3644 } else {
3645 const TargetRegisterClass *RC;
3646
3647 if (RegVT == MVT::f32)
3648 RC = &ARM::SPRRegClass;
3649 else if (RegVT == MVT::f64)
3650 RC = &ARM::DPRRegClass;
3651 else if (RegVT == MVT::v2f64)
3652 RC = &ARM::QPRRegClass;
3653 else if (RegVT == MVT::i32)
3654 RC = AFI->isThumb1OnlyFunction() ? &ARM::tGPRRegClass
3655 : &ARM::GPRRegClass;
3656 else
3657 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering")::llvm::llvm_unreachable_internal("RegVT not supported by FORMAL_ARGUMENTS Lowering"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3657)
;
3658
3659 // Transform the arguments in physical registers into virtual ones.
3660 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
3661 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3662 }
3663
3664 // If this is an 8 or 16-bit value, it is really passed promoted
3665 // to 32 bits. Insert an assert[sz]ext to capture this, then
3666 // truncate to the right size.
3667 switch (VA.getLocInfo()) {
3668 default: llvm_unreachable("Unknown loc info!")::llvm::llvm_unreachable_internal("Unknown loc info!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3668)
;
3669 case CCValAssign::Full: break;
3670 case CCValAssign::BCvt:
3671 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
3672 break;
3673 case CCValAssign::SExt:
3674 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3675 DAG.getValueType(VA.getValVT()));
3676 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3677 break;
3678 case CCValAssign::ZExt:
3679 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3680 DAG.getValueType(VA.getValVT()));
3681 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3682 break;
3683 }
3684
3685 InVals.push_back(ArgValue);
3686
3687 } else { // VA.isRegLoc()
3688 // sanity check
3689 assert(VA.isMemLoc())((VA.isMemLoc()) ? static_cast<void> (0) : __assert_fail
("VA.isMemLoc()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3689, __PRETTY_FUNCTION__))
;
3690 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered")((VA.getValVT() != MVT::i64 && "i64 should already be lowered"
) ? static_cast<void> (0) : __assert_fail ("VA.getValVT() != MVT::i64 && \"i64 should already be lowered\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3690, __PRETTY_FUNCTION__))
;
3691
3692 int index = VA.getValNo();
3693
3694 // Some Ins[] entries become multiple ArgLoc[] entries.
3695 // Process them only once.
3696 if (index != lastInsIndex)
3697 {
3698 ISD::ArgFlagsTy Flags = Ins[index].Flags;
3699 // FIXME: For now, all byval parameter objects are marked mutable.
3700 // This can be changed with more analysis.
3701 // In case of tail call optimization mark all arguments mutable.
3702 // Since they could be overwritten by lowering of arguments in case of
3703 // a tail call.
3704 if (Flags.isByVal()) {
3705 assert(Ins[index].isOrigArg() &&((Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3706, __PRETTY_FUNCTION__))
3706 "Byval arguments cannot be implicit")((Ins[index].isOrigArg() && "Byval arguments cannot be implicit"
) ? static_cast<void> (0) : __assert_fail ("Ins[index].isOrigArg() && \"Byval arguments cannot be implicit\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3706, __PRETTY_FUNCTION__))
;
3707 unsigned CurByValIndex = CCInfo.getInRegsParamsProcessed();
3708
3709 int FrameIndex = StoreByValRegs(
3710 CCInfo, DAG, dl, Chain, &*CurOrigArg, CurByValIndex,
3711 VA.getLocMemOffset(), Flags.getByValSize());
3712 InVals.push_back(DAG.getFrameIndex(FrameIndex, PtrVT));
3713 CCInfo.nextInRegsParam();
3714 } else {
3715 unsigned FIOffset = VA.getLocMemOffset();
3716 int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
3717 FIOffset, true);
3718
3719 // Create load nodes to retrieve arguments from the stack.
3720 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3721 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
3722 MachinePointerInfo::getFixedStack(
3723 DAG.getMachineFunction(), FI)));
3724 }
3725 lastInsIndex = index;
3726 }
3727 }
3728 }
3729
3730 // varargs
3731 if (isVarArg && MFI.hasVAStart())
3732 VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
3733 CCInfo.getNextStackOffset(),
3734 TotalArgRegsSaveSize);
3735
3736 AFI->setArgumentStackSize(CCInfo.getNextStackOffset());
3737
3738 return Chain;
3739}
3740
3741/// isFloatingPointZero - Return true if this is +0.0.
3742static bool isFloatingPointZero(SDValue Op) {
3743 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
3744 return CFP->getValueAPF().isPosZero();
3745 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
3746 // Maybe this has already been legalized into the constant pool?
3747 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) {
3748 SDValue WrapperOp = Op.getOperand(1).getOperand(0);
3749 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp))
3750 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
3751 return CFP->getValueAPF().isPosZero();
3752 }
3753 } else if (Op->getOpcode() == ISD::BITCAST &&
3754 Op->getValueType(0) == MVT::f64) {
3755 // Handle (ISD::BITCAST (ARMISD::VMOVIMM (ISD::TargetConstant 0)) MVT::f64)
3756 // created by LowerConstantFP().
3757 SDValue BitcastOp = Op->getOperand(0);
3758 if (BitcastOp->getOpcode() == ARMISD::VMOVIMM &&
3759 isNullConstant(BitcastOp->getOperand(0)))
3760 return true;
3761 }
3762 return false;
3763}
3764
3765/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
3766/// the given operands.
3767SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
3768 SDValue &ARMcc, SelectionDAG &DAG,
3769 const SDLoc &dl) const {
3770 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
3771 unsigned C = RHSC->getZExtValue();
3772 if (!isLegalICmpImmediate(C)) {
3773 // Constant does not fit, try adjusting it by one?
3774 switch (CC) {
3775 default: break;
3776 case ISD::SETLT:
3777 case ISD::SETGE:
3778 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) {
3779 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
3780 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3781 }
3782 break;
3783 case ISD::SETULT:
3784 case ISD::SETUGE:
3785 if (C != 0 && isLegalICmpImmediate(C-1)) {
3786 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
3787 RHS = DAG.getConstant(C - 1, dl, MVT::i32);
3788 }
3789 break;
3790 case ISD::SETLE:
3791 case ISD::SETGT:
3792 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) {
3793 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
3794 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3795 }
3796 break;
3797 case ISD::SETULE:
3798 case ISD::SETUGT:
3799 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) {
3800 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
3801 RHS = DAG.getConstant(C + 1, dl, MVT::i32);
3802 }
3803 break;
3804 }
3805 }
3806 }
3807
3808 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
3809 ARMISD::NodeType CompareType;
3810 switch (CondCode) {
3811 default:
3812 CompareType = ARMISD::CMP;
3813 break;
3814 case ARMCC::EQ:
3815 case ARMCC::NE:
3816 // Uses only Z Flag
3817 CompareType = ARMISD::CMPZ;
3818 break;
3819 }
3820 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
3821 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS);
3822}
3823
3824/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
3825SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS,
3826 SelectionDAG &DAG, const SDLoc &dl,
3827 bool InvalidOnQNaN) const {
3828 assert(!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64)((!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64
) ? static_cast<void> (0) : __assert_fail ("!Subtarget->isFPOnlySP() || RHS.getValueType() != MVT::f64"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3828, __PRETTY_FUNCTION__))
;
3829 SDValue Cmp;
3830 SDValue C = DAG.getConstant(InvalidOnQNaN, dl, MVT::i32);
3831 if (!isFloatingPointZero(RHS))
3832 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS, C);
3833 else
3834 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS, C);
3835 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp);
3836}
3837
3838/// duplicateCmp - Glue values can have only one use, so this function
3839/// duplicates a comparison node.
3840SDValue
3841ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const {
3842 unsigned Opc = Cmp.getOpcode();
3843 SDLoc DL(Cmp);
3844 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ)
3845 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1));
3846
3847 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation")((Opc == ARMISD::FMSTAT && "unexpected comparison operation"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::FMSTAT && \"unexpected comparison operation\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3847, __PRETTY_FUNCTION__))
;
3848 Cmp = Cmp.getOperand(0);
3849 Opc = Cmp.getOpcode();
3850 if (Opc == ARMISD::CMPFP)
3851 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3852 Cmp.getOperand(1), Cmp.getOperand(2));
3853 else {
3854 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT")((Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"
) ? static_cast<void> (0) : __assert_fail ("Opc == ARMISD::CMPFPw0 && \"unexpected operand of FMSTAT\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3854, __PRETTY_FUNCTION__))
;
3855 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),
3856 Cmp.getOperand(1));
3857 }
3858 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp);
3859}
3860
3861std::pair<SDValue, SDValue>
3862ARMTargetLowering::getARMXALUOOp(SDValue Op, SelectionDAG &DAG,
3863 SDValue &ARMcc) const {
3864 assert(Op.getValueType() == MVT::i32 && "Unsupported value type")((Op.getValueType() == MVT::i32 && "Unsupported value type"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"Unsupported value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3864, __PRETTY_FUNCTION__))
;
3865
3866 SDValue Value, OverflowCmp;
3867 SDValue LHS = Op.getOperand(0);
3868 SDValue RHS = Op.getOperand(1);
3869 SDLoc dl(Op);
3870
3871 // FIXME: We are currently always generating CMPs because we don't support
3872 // generating CMN through the backend. This is not as good as the natural
3873 // CMP case because it causes a register dependency and cannot be folded
3874 // later.
3875
3876 switch (Op.getOpcode()) {
3877 default:
3878 llvm_unreachable("Unknown overflow instruction!")::llvm::llvm_unreachable_internal("Unknown overflow instruction!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3878)
;
3879 case ISD::SADDO:
3880 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3881 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3882 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3883 break;
3884 case ISD::UADDO:
3885 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3886 Value = DAG.getNode(ISD::ADD, dl, Op.getValueType(), LHS, RHS);
3887 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, Value, LHS);
3888 break;
3889 case ISD::SSUBO:
3890 ARMcc = DAG.getConstant(ARMCC::VC, dl, MVT::i32);
3891 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3892 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3893 break;
3894 case ISD::USUBO:
3895 ARMcc = DAG.getConstant(ARMCC::HS, dl, MVT::i32);
3896 Value = DAG.getNode(ISD::SUB, dl, Op.getValueType(), LHS, RHS);
3897 OverflowCmp = DAG.getNode(ARMISD::CMP, dl, MVT::Glue, LHS, RHS);
3898 break;
3899 } // switch (...)
3900
3901 return std::make_pair(Value, OverflowCmp);
3902}
3903
3904SDValue
3905ARMTargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) const {
3906 // Let legalize expand this if it isn't a legal type yet.
3907 if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
3908 return SDValue();
3909
3910 SDValue Value, OverflowCmp;
3911 SDValue ARMcc;
3912 std::tie(Value, OverflowCmp) = getARMXALUOOp(Op, DAG, ARMcc);
3913 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3914 SDLoc dl(Op);
3915 // We use 0 and 1 as false and true values.
3916 SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
3917 SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
3918 EVT VT = Op.getValueType();
3919
3920 SDValue Overflow = DAG.getNode(ARMISD::CMOV, dl, VT, TVal, FVal,
3921 ARMcc, CCR, OverflowCmp);
3922
3923 SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
3924 return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
3925}
3926
3927SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
3928 SDValue Cond = Op.getOperand(0);
3929 SDValue SelectTrue = Op.getOperand(1);
3930 SDValue SelectFalse = Op.getOperand(2);
3931 SDLoc dl(Op);
3932 unsigned Opc = Cond.getOpcode();
3933
3934 if (Cond.getResNo() == 1 &&
3935 (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO ||
3936 Opc == ISD::USUBO)) {
3937 if (!DAG.getTargetLoweringInfo().isTypeLegal(Cond->getValueType(0)))
3938 return SDValue();
3939
3940 SDValue Value, OverflowCmp;
3941 SDValue ARMcc;
3942 std::tie(Value, OverflowCmp) = getARMXALUOOp(Cond, DAG, ARMcc);
3943 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
3944 EVT VT = Op.getValueType();
3945
3946 return getCMOV(dl, VT, SelectTrue, SelectFalse, ARMcc, CCR,
3947 OverflowCmp, DAG);
3948 }
3949
3950 // Convert:
3951 //
3952 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
3953 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
3954 //
3955 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
3956 const ConstantSDNode *CMOVTrue =
3957 dyn_cast<ConstantSDNode>(Cond.getOperand(0));
3958 const ConstantSDNode *CMOVFalse =
3959 dyn_cast<ConstantSDNode>(Cond.getOperand(1));
3960
3961 if (CMOVTrue && CMOVFalse) {
3962 unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
3963 unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
3964
3965 SDValue True;
3966 SDValue False;
3967 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
3968 True = SelectTrue;
3969 False = SelectFalse;
3970 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
3971 True = SelectFalse;
3972 False = SelectTrue;
3973 }
3974
3975 if (True.getNode() && False.getNode()) {
3976 EVT VT = Op.getValueType();
3977 SDValue ARMcc = Cond.getOperand(2);
3978 SDValue CCR = Cond.getOperand(3);
3979 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG);
3980 assert(True.getValueType() == VT)((True.getValueType() == VT) ? static_cast<void> (0) : __assert_fail
("True.getValueType() == VT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 3980, __PRETTY_FUNCTION__))
;
3981 return getCMOV(dl, VT, True, False, ARMcc, CCR, Cmp, DAG);
3982 }
3983 }
3984 }
3985
3986 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
3987 // undefined bits before doing a full-word comparison with zero.
3988 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
3989 DAG.getConstant(1, dl, Cond.getValueType()));
3990
3991 return DAG.getSelectCC(dl, Cond,
3992 DAG.getConstant(0, dl, Cond.getValueType()),
3993 SelectTrue, SelectFalse, ISD::SETNE);
3994}
3995
3996static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
3997 bool &swpCmpOps, bool &swpVselOps) {
3998 // Start by selecting the GE condition code for opcodes that return true for
3999 // 'equality'
4000 if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE ||
4001 CC == ISD::SETULE)
4002 CondCode = ARMCC::GE;
4003
4004 // and GT for opcodes that return false for 'equality'.
4005 else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT ||
4006 CC == ISD::SETULT)
4007 CondCode = ARMCC::GT;
4008
4009 // Since we are constrained to GE/GT, if the opcode contains 'less', we need
4010 // to swap the compare operands.
4011 if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT ||
4012 CC == ISD::SETULT)
4013 swpCmpOps = true;
4014
4015 // Both GT and GE are ordered comparisons, and return false for 'unordered'.
4016 // If we have an unordered opcode, we need to swap the operands to the VSEL
4017 // instruction (effectively negating the condition).
4018 //
4019 // This also has the effect of swapping which one of 'less' or 'greater'
4020 // returns true, so we also swap the compare operands. It also switches
4021 // whether we return true for 'equality', so we compensate by picking the
4022 // opposite condition code to our original choice.
4023 if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE ||
4024 CC == ISD::SETUGT) {
4025 swpCmpOps = !swpCmpOps;
4026 swpVselOps = !swpVselOps;
4027 CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT;
4028 }
4029
4030 // 'ordered' is 'anything but unordered', so use the VS condition code and
4031 // swap the VSEL operands.
4032 if (CC == ISD::SETO) {
4033 CondCode = ARMCC::VS;
4034 swpVselOps = true;
4035 }
4036
4037 // 'unordered or not equal' is 'anything but equal', so use the EQ condition
4038 // code and swap the VSEL operands.
4039 if (CC == ISD::SETUNE) {
4040 CondCode = ARMCC::EQ;
4041 swpVselOps = true;
4042 }
4043}
4044
4045SDValue ARMTargetLowering::getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal,
4046 SDValue TrueVal, SDValue ARMcc, SDValue CCR,
4047 SDValue Cmp, SelectionDAG &DAG) const {
4048 if (Subtarget->isFPOnlySP() && VT == MVT::f64) {
4049 FalseVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4050 DAG.getVTList(MVT::i32, MVT::i32), FalseVal);
4051 TrueVal = DAG.getNode(ARMISD::VMOVRRD, dl,
4052 DAG.getVTList(MVT::i32, MVT::i32), TrueVal);
4053
4054 SDValue TrueLow = TrueVal.getValue(0);
4055 SDValue TrueHigh = TrueVal.getValue(1);
4056 SDValue FalseLow = FalseVal.getValue(0);
4057 SDValue FalseHigh = FalseVal.getValue(1);
4058
4059 SDValue Low = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseLow, TrueLow,
4060 ARMcc, CCR, Cmp);
4061 SDValue High = DAG.getNode(ARMISD::CMOV, dl, MVT::i32, FalseHigh, TrueHigh,
4062 ARMcc, CCR, duplicateCmp(Cmp, DAG));
4063
4064 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Low, High);
4065 } else {
4066 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,
4067 Cmp);
4068 }
4069}
4070
4071static bool isGTorGE(ISD::CondCode CC) {
4072 return CC == ISD::SETGT || CC == ISD::SETGE;
4073}
4074
4075static bool isLTorLE(ISD::CondCode CC) {
4076 return CC == ISD::SETLT || CC == ISD::SETLE;
4077}
4078
4079// See if a conditional (LHS CC RHS ? TrueVal : FalseVal) is lower-saturating.
4080// All of these conditions (and their <= and >= counterparts) will do:
4081// x < k ? k : x
4082// x > k ? x : k
4083// k < x ? x : k
4084// k > x ? k : x
4085static bool isLowerSaturate(const SDValue LHS, const SDValue RHS,
4086 const SDValue TrueVal, const SDValue FalseVal,
4087 const ISD::CondCode CC, const SDValue K) {
4088 return (isGTorGE(CC) &&
4089 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal))) ||
4090 (isLTorLE(CC) &&
4091 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal)));
4092}
4093
4094// Similar to isLowerSaturate(), but checks for upper-saturating conditions.
4095static bool isUpperSaturate(const SDValue LHS, const SDValue RHS,
4096 const SDValue TrueVal, const SDValue FalseVal,
4097 const ISD::CondCode CC, const SDValue K) {
4098 return (isGTorGE(CC) &&
4099 ((K == RHS && K == TrueVal) || (K == LHS && K == FalseVal))) ||
4100 (isLTorLE(CC) &&
4101 ((K == LHS && K == TrueVal) || (K == RHS && K == FalseVal)));
4102}
4103
4104// Check if two chained conditionals could be converted into SSAT.
4105//
4106// SSAT can replace a set of two conditional selectors that bound a number to an
4107// interval of type [k, ~k] when k + 1 is a power of 2. Here are some examples:
4108//
4109// x < -k ? -k : (x > k ? k : x)
4110// x < -k ? -k : (x < k ? x : k)
4111// x > -k ? (x > k ? k : x) : -k
4112// x < k ? (x < -k ? -k : x) : k
4113// etc.
4114//
4115// It returns true if the conversion can be done, false otherwise.
4116// Additionally, the variable is returned in parameter V and the constant in K.
4117static bool isSaturatingConditional(const SDValue &Op, SDValue &V,
4118 uint64_t &K) {
4119 SDValue LHS1 = Op.getOperand(0);
4120 SDValue RHS1 = Op.getOperand(1);
4121 SDValue TrueVal1 = Op.getOperand(2);
4122 SDValue FalseVal1 = Op.getOperand(3);
4123 ISD::CondCode CC1 = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4124
4125 const SDValue Op2 = isa<ConstantSDNode>(TrueVal1) ? FalseVal1 : TrueVal1;
4126 if (Op2.getOpcode() != ISD::SELECT_CC)
4127 return false;
4128
4129 SDValue LHS2 = Op2.getOperand(0);
4130 SDValue RHS2 = Op2.getOperand(1);
4131 SDValue TrueVal2 = Op2.getOperand(2);
4132 SDValue FalseVal2 = Op2.getOperand(3);
4133 ISD::CondCode CC2 = cast<CondCodeSDNode>(Op2.getOperand(4))->get();
4134
4135 // Find out which are the constants and which are the variables
4136 // in each conditional
4137 SDValue *K1 = isa<ConstantSDNode>(LHS1) ? &LHS1 : isa<ConstantSDNode>(RHS1)
4138 ? &RHS1
4139 : nullptr;
4140 SDValue *K2 = isa<ConstantSDNode>(LHS2) ? &LHS2 : isa<ConstantSDNode>(RHS2)
4141 ? &RHS2
4142 : nullptr;
4143 SDValue K2Tmp = isa<ConstantSDNode>(TrueVal2) ? TrueVal2 : FalseVal2;
4144 SDValue V1Tmp = (K1 && *K1 == LHS1) ? RHS1 : LHS1;
4145 SDValue V2Tmp = (K2 && *K2 == LHS2) ? RHS2 : LHS2;
4146 SDValue V2 = (K2Tmp == TrueVal2) ? FalseVal2 : TrueVal2;
4147
4148 // We must detect cases where the original operations worked with 16- or
4149 // 8-bit values. In such case, V2Tmp != V2 because the comparison operations
4150 // must work with sign-extended values but the select operations return
4151 // the original non-extended value.
4152 SDValue V2TmpReg = V2Tmp;
4153 if (V2Tmp->getOpcode() == ISD::SIGN_EXTEND_INREG)
4154 V2TmpReg = V2Tmp->getOperand(0);
4155
4156 // Check that the registers and the constants have the correct values
4157 // in both conditionals
4158 if (!K1 || !K2 || *K1 == Op2 || *K2 != K2Tmp || V1Tmp != V2Tmp ||
4159 V2TmpReg != V2)
4160 return false;
4161
4162 // Figure out which conditional is saturating the lower/upper bound.
4163 const SDValue *LowerCheckOp =
4164 isLowerSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4165 ? &Op
4166 : isLowerSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4167 ? &Op2
4168 : nullptr;
4169 const SDValue *UpperCheckOp =
4170 isUpperSaturate(LHS1, RHS1, TrueVal1, FalseVal1, CC1, *K1)
4171 ? &Op
4172 : isUpperSaturate(LHS2, RHS2, TrueVal2, FalseVal2, CC2, *K2)
4173 ? &Op2
4174 : nullptr;
4175
4176 if (!UpperCheckOp || !LowerCheckOp || LowerCheckOp == UpperCheckOp)
4177 return false;
4178
4179 // Check that the constant in the lower-bound check is
4180 // the opposite of the constant in the upper-bound check
4181 // in 1's complement.
4182 int64_t Val1 = cast<ConstantSDNode>(*K1)->getSExtValue();
4183 int64_t Val2 = cast<ConstantSDNode>(*K2)->getSExtValue();
4184 int64_t PosVal = std::max(Val1, Val2);
4185
4186 if (((Val1 > Val2 && UpperCheckOp == &Op) ||
4187 (Val1 < Val2 && UpperCheckOp == &Op2)) &&
4188 Val1 == ~Val2 && isPowerOf2_64(PosVal + 1)) {
4189
4190 V = V2;
4191 K = (uint64_t)PosVal; // At this point, PosVal is guaranteed to be positive
4192 return true;
4193 }
4194
4195 return false;
4196}
4197
4198SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
4199 EVT VT = Op.getValueType();
4200 SDLoc dl(Op);
4201
4202 // Try to convert two saturating conditional selects into a single SSAT
4203 SDValue SatValue;
4204 uint64_t SatConstant;
4205 if (((!Subtarget->isThumb() && Subtarget->hasV6Ops()) || Subtarget->isThumb2()) &&
4206 isSaturatingConditional(Op, SatValue, SatConstant))
4207 return DAG.getNode(ARMISD::SSAT, dl, VT, SatValue,
4208 DAG.getConstant(countTrailingOnes(SatConstant), dl, VT));
4209
4210 SDValue LHS = Op.getOperand(0);
4211 SDValue RHS = Op.getOperand(1);
4212 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
4213 SDValue TrueVal = Op.getOperand(2);
4214 SDValue FalseVal = Op.getOperand(3);
4215
4216 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4217 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4218 dl);
4219
4220 // If softenSetCCOperands only returned one value, we should compare it to
4221 // zero.
4222 if (!RHS.getNode()) {
4223 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4224 CC = ISD::SETNE;
4225 }
4226 }
4227
4228 if (LHS.getValueType() == MVT::i32) {
4229 // Try to generate VSEL on ARMv8.
4230 // The VSEL instruction can't use all the usual ARM condition
4231 // codes: it only has two bits to select the condition code, so it's
4232 // constrained to use only GE, GT, VS and EQ.
4233 //
4234 // To implement all the various ISD::SETXXX opcodes, we sometimes need to
4235 // swap the operands of the previous compare instruction (effectively
4236 // inverting the compare condition, swapping 'less' and 'greater') and
4237 // sometimes need to swap the operands to the VSEL (which inverts the
4238 // condition in the sense of firing whenever the previous condition didn't)
4239 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4240 TrueVal.getValueType() == MVT::f64)) {
4241 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4242 if (CondCode == ARMCC::LT || CondCode == ARMCC::LE ||
4243 CondCode == ARMCC::VC || CondCode == ARMCC::NE) {
4244 CC = ISD::getSetCCInverse(CC, true);
4245 std::swap(TrueVal, FalseVal);
4246 }
4247 }
4248
4249 SDValue ARMcc;
4250 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4251 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4252 return getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4253 }
4254
4255 ARMCC::CondCodes CondCode, CondCode2;
4256 bool InvalidOnQNaN;
4257 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4258
4259 // Try to generate VMAXNM/VMINNM on ARMv8.
4260 if (Subtarget->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 ||
4261 TrueVal.getValueType() == MVT::f64)) {
4262 bool swpCmpOps = false;
4263 bool swpVselOps = false;
4264 checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps);
4265
4266 if (CondCode == ARMCC::GT || CondCode == ARMCC::GE ||
4267 CondCode == ARMCC::VS || CondCode == ARMCC::EQ) {
4268 if (swpCmpOps)
4269 std::swap(LHS, RHS);
4270 if (swpVselOps)
4271 std::swap(TrueVal, FalseVal);
4272 }
4273 }
4274
4275 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4276 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4277 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4278 SDValue Result = getCMOV(dl, VT, FalseVal, TrueVal, ARMcc, CCR, Cmp, DAG);
4279 if (CondCode2 != ARMCC::AL) {
4280 SDValue ARMcc2 = DAG.getConstant(CondCode2, dl, MVT::i32);
4281 // FIXME: Needs another CMP because flag can have but one use.
4282 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4283 Result = getCMOV(dl, VT, Result, TrueVal, ARMcc2, CCR, Cmp2, DAG);
4284 }
4285 return Result;
4286}
4287
4288/// canChangeToInt - Given the fp compare operand, return true if it is suitable
4289/// to morph to an integer compare sequence.
4290static bool canChangeToInt(SDValue Op, bool &SeenZero,
4291 const ARMSubtarget *Subtarget) {
4292 SDNode *N = Op.getNode();
4293 if (!N->hasOneUse())
4294 // Otherwise it requires moving the value from fp to integer registers.
4295 return false;
4296 if (!N->getNumValues())
4297 return false;
4298 EVT VT = Op.getValueType();
4299 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
4300 // f32 case is generally profitable. f64 case only makes sense when vcmpe +
4301 // vmrs are very slow, e.g. cortex-a8.
4302 return false;
4303
4304 if (isFloatingPointZero(Op)) {
4305 SeenZero = true;
4306 return true;
4307 }
4308 return ISD::isNormalLoad(N);
4309}
4310
4311static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
4312 if (isFloatingPointZero(Op))
4313 return DAG.getConstant(0, SDLoc(Op), MVT::i32);
4314
4315 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
4316 return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(),
4317 Ld->getPointerInfo(), Ld->getAlignment(),
4318 Ld->getMemOperand()->getFlags());
4319
4320 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4320)
;
4321}
4322
4323static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
4324 SDValue &RetVal1, SDValue &RetVal2) {
4325 SDLoc dl(Op);
4326
4327 if (isFloatingPointZero(Op)) {
4328 RetVal1 = DAG.getConstant(0, dl, MVT::i32);
4329 RetVal2 = DAG.getConstant(0, dl, MVT::i32);
4330 return;
4331 }
4332
4333 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
4334 SDValue Ptr = Ld->getBasePtr();
4335 RetVal1 =
4336 DAG.getLoad(MVT::i32, dl, Ld->getChain(), Ptr, Ld->getPointerInfo(),
4337 Ld->getAlignment(), Ld->getMemOperand()->getFlags());
4338
4339 EVT PtrType = Ptr.getValueType();
4340 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
4341 SDValue NewPtr = DAG.getNode(ISD::ADD, dl,
4342 PtrType, Ptr, DAG.getConstant(4, dl, PtrType));
4343 RetVal2 = DAG.getLoad(MVT::i32, dl, Ld->getChain(), NewPtr,
4344 Ld->getPointerInfo().getWithOffset(4), NewAlign,
4345 Ld->getMemOperand()->getFlags());
4346 return;
4347 }
4348
4349 llvm_unreachable("Unknown VFP cmp argument!")::llvm::llvm_unreachable_internal("Unknown VFP cmp argument!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4349)
;
4350}
4351
4352/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
4353/// f32 and even f64 comparisons to integer ones.
4354SDValue
4355ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
4356 SDValue Chain = Op.getOperand(0);
4357 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4358 SDValue LHS = Op.getOperand(2);
4359 SDValue RHS = Op.getOperand(3);
4360 SDValue Dest = Op.getOperand(4);
4361 SDLoc dl(Op);
4362
4363 bool LHSSeenZero = false;
4364 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget);
4365 bool RHSSeenZero = false;
4366 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget);
4367 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) {
4368 // If unsafe fp math optimization is enabled and there are no other uses of
4369 // the CMP operands, and the condition code is EQ or NE, we can optimize it
4370 // to an integer comparison.
4371 if (CC == ISD::SETOEQ)
4372 CC = ISD::SETEQ;
4373 else if (CC == ISD::SETUNE)
4374 CC = ISD::SETNE;
4375
4376 SDValue Mask = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4377 SDValue ARMcc;
4378 if (LHS.getValueType() == MVT::f32) {
4379 LHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4380 bitcastf32Toi32(LHS, DAG), Mask);
4381 RHS = DAG.getNode(ISD::AND, dl, MVT::i32,
4382 bitcastf32Toi32(RHS, DAG), Mask);
4383 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4384 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4385 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4386 Chain, Dest, ARMcc, CCR, Cmp);
4387 }
4388
4389 SDValue LHS1, LHS2;
4390 SDValue RHS1, RHS2;
4391 expandf64Toi32(LHS, DAG, LHS1, LHS2);
4392 expandf64Toi32(RHS, DAG, RHS1, RHS2);
4393 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask);
4394 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask);
4395 ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
4396 ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4397 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4398 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
4399 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops);
4400 }
4401
4402 return SDValue();
4403}
4404
4405SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
4406 SDValue Chain = Op.getOperand(0);
4407 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
4408 SDValue LHS = Op.getOperand(2);
4409 SDValue RHS = Op.getOperand(3);
4410 SDValue Dest = Op.getOperand(4);
4411 SDLoc dl(Op);
4412
4413 if (Subtarget->isFPOnlySP() && LHS.getValueType() == MVT::f64) {
4414 DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC,
4415 dl);
4416
4417 // If softenSetCCOperands only returned one value, we should compare it to
4418 // zero.
4419 if (!RHS.getNode()) {
4420 RHS = DAG.getConstant(0, dl, LHS.getValueType());
4421 CC = ISD::SETNE;
4422 }
4423 }
4424
4425 if (LHS.getValueType() == MVT::i32) {
4426 SDValue ARMcc;
4427 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
4428 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4429 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
4430 Chain, Dest, ARMcc, CCR, Cmp);
4431 }
4432
4433 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64)((LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT
::f64) ? static_cast<void> (0) : __assert_fail ("LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4433, __PRETTY_FUNCTION__))
;
4434
4435 if (getTargetMachine().Options.UnsafeFPMath &&
4436 (CC == ISD::SETEQ || CC == ISD::SETOEQ ||
4437 CC == ISD::SETNE || CC == ISD::SETUNE)) {
4438 if (SDValue Result = OptimizeVFPBrcond(Op, DAG))
4439 return Result;
4440 }
4441
4442 ARMCC::CondCodes CondCode, CondCode2;
4443 bool InvalidOnQNaN;
4444 FPCCToARMCC(CC, CondCode, CondCode2, InvalidOnQNaN);
4445
4446 SDValue ARMcc = DAG.getConstant(CondCode, dl, MVT::i32);
4447 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl, InvalidOnQNaN);
4448 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4449 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue);
4450 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
4451 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4452 if (CondCode2 != ARMCC::AL) {
4453 ARMcc = DAG.getConstant(CondCode2, dl, MVT::i32);
4454 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
4455 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops);
4456 }
4457 return Res;
4458}
4459
4460SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
4461 SDValue Chain = Op.getOperand(0);
4462 SDValue Table = Op.getOperand(1);
4463 SDValue Index = Op.getOperand(2);
4464 SDLoc dl(Op);
4465
4466 EVT PTy = getPointerTy(DAG.getDataLayout());
4467 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
4468 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy);
4469 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
4470 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
4471 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
4472 if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
4473 // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
4474 // which does another jump to the destination. This also makes it easier
4475 // to translate it to TBB / TBH later (Thumb2 only).
4476 // FIXME: This might not work if the function is extremely large.
4477 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
4478 Addr, Op.getOperand(2), JTI);
4479 }
4480 if (isPositionIndependent() || Subtarget->isROPI()) {
4481 Addr =
4482 DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
4483 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4484 Chain = Addr.getValue(1);
4485 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table);
4486 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4487 } else {
4488 Addr =
4489 DAG.getLoad(PTy, dl, Chain, Addr,
4490 MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
4491 Chain = Addr.getValue(1);
4492 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI);
4493 }
4494}
4495
4496static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) {
4497 EVT VT = Op.getValueType();
4498 SDLoc dl(Op);
4499
4500 if (Op.getValueType().getVectorElementType() == MVT::i32) {
4501 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32)
4502 return Op;
4503 return DAG.UnrollVectorOp(Op.getNode());
4504 }
4505
4506 assert(Op.getOperand(0).getValueType() == MVT::v4f32 &&((Op.getOperand(0).getValueType() == MVT::v4f32 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4507, __PRETTY_FUNCTION__))
4507 "Invalid type for custom lowering!")((Op.getOperand(0).getValueType() == MVT::v4f32 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4f32 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4507, __PRETTY_FUNCTION__))
;
4508 if (VT != MVT::v4i16)
4509 return DAG.UnrollVectorOp(Op.getNode());
4510
4511 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0));
4512 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op);
4513}
4514
4515SDValue ARMTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const {
4516 EVT VT = Op.getValueType();
4517 if (VT.isVector())
4518 return LowerVectorFP_TO_INT(Op, DAG);
4519 if (Subtarget->isFPOnlySP() && Op.getOperand(0).getValueType() == MVT::f64) {
4520 RTLIB::Libcall LC;
4521 if (Op.getOpcode() == ISD::FP_TO_SINT)
4522 LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(),
4523 Op.getValueType());
4524 else
4525 LC = RTLIB::getFPTOUINT(Op.getOperand(0).getValueType(),
4526 Op.getValueType());
4527 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4528 /*isSigned*/ false, SDLoc(Op)).first;
4529 }
4530
4531 return Op;
4532}
4533
4534static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
4535 EVT VT = Op.getValueType();
4536 SDLoc dl(Op);
4537
4538 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) {
4539 if (VT.getVectorElementType() == MVT::f32)
4540 return Op;
4541 return DAG.UnrollVectorOp(Op.getNode());
4542 }
4543
4544 assert(Op.getOperand(0).getValueType() == MVT::v4i16 &&((Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4545, __PRETTY_FUNCTION__))
4545 "Invalid type for custom lowering!")((Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOperand(0).getValueType() == MVT::v4i16 && \"Invalid type for custom lowering!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4545, __PRETTY_FUNCTION__))
;
4546 if (VT != MVT::v4f32)
4547 return DAG.UnrollVectorOp(Op.getNode());
4548
4549 unsigned CastOpc;
4550 unsigned Opc;
4551 switch (Op.getOpcode()) {
4552 default: llvm_unreachable("Invalid opcode!")::llvm::llvm_unreachable_internal("Invalid opcode!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4552)
;
4553 case ISD::SINT_TO_FP:
4554 CastOpc = ISD::SIGN_EXTEND;
4555 Opc = ISD::SINT_TO_FP;
4556 break;
4557 case ISD::UINT_TO_FP:
4558 CastOpc = ISD::ZERO_EXTEND;
4559 Opc = ISD::UINT_TO_FP;
4560 break;
4561 }
4562
4563 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0));
4564 return DAG.getNode(Opc, dl, VT, Op);
4565}
4566
4567SDValue ARMTargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const {
4568 EVT VT = Op.getValueType();
4569 if (VT.isVector())
4570 return LowerVectorINT_TO_FP(Op, DAG);
4571 if (Subtarget->isFPOnlySP() && Op.getValueType() == MVT::f64) {
4572 RTLIB::Libcall LC;
4573 if (Op.getOpcode() == ISD::SINT_TO_FP)
4574 LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(),
4575 Op.getValueType());
4576 else
4577 LC = RTLIB::getUINTTOFP(Op.getOperand(0).getValueType(),
4578 Op.getValueType());
4579 return makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(0),
4580 /*isSigned*/ false, SDLoc(Op)).first;
4581 }
4582
4583 return Op;
4584}
4585
4586SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
4587 // Implement fcopysign with a fabs and a conditional fneg.
4588 SDValue Tmp0 = Op.getOperand(0);
4589 SDValue Tmp1 = Op.getOperand(1);
4590 SDLoc dl(Op);
4591 EVT VT = Op.getValueType();
4592 EVT SrcVT = Tmp1.getValueType();
4593 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST ||
4594 Tmp0.getOpcode() == ARMISD::VMOVDRR;
4595 bool UseNEON = !InGPR && Subtarget->hasNEON();
4596
4597 if (UseNEON) {
4598 // Use VBSL to copy the sign bit.
4599 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80);
4600 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32,
4601 DAG.getTargetConstant(EncodedVal, dl, MVT::i32));
4602 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64;
4603 if (VT == MVT::f64)
4604 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4605 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask),
4606 DAG.getConstant(32, dl, MVT::i32));
4607 else /*if (VT == MVT::f32)*/
4608 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0);
4609 if (SrcVT == MVT::f32) {
4610 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1);
4611 if (VT == MVT::f64)
4612 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT,
4613 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1),
4614 DAG.getConstant(32, dl, MVT::i32));
4615 } else if (VT == MVT::f32)
4616 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64,
4617 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1),
4618 DAG.getConstant(32, dl, MVT::i32));
4619 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0);
4620 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1);
4621
4622 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff),
4623 dl, MVT::i32);
4624 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes);
4625 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask,
4626 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes));
4627
4628 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT,
4629 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask),
4630 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot));
4631 if (VT == MVT::f32) {
4632 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res);
4633 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res,
4634 DAG.getConstant(0, dl, MVT::i32));
4635 } else {
4636 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res);
4637 }
4638
4639 return Res;
4640 }
4641
4642 // Bitcast operand 1 to i32.
4643 if (SrcVT == MVT::f64)
4644 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4645 Tmp1).getValue(1);
4646 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1);
4647
4648 // Or in the signbit with integer operations.
4649 SDValue Mask1 = DAG.getConstant(0x80000000, dl, MVT::i32);
4650 SDValue Mask2 = DAG.getConstant(0x7fffffff, dl, MVT::i32);
4651 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1);
4652 if (VT == MVT::f32) {
4653 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32,
4654 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2);
4655 return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4656 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1));
4657 }
4658
4659 // f64: Or the high part with signbit and then combine two parts.
4660 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32),
4661 Tmp0);
4662 SDValue Lo = Tmp0.getValue(0);
4663 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2);
4664 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1);
4665 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
4666}
4667
4668SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
4669 MachineFunction &MF = DAG.getMachineFunction();
4670 MachineFrameInfo &MFI = MF.getFrameInfo();
4671 MFI.setReturnAddressIsTaken(true);
4672
4673 if (verifyReturnAddressArgumentIsConstant(Op, DAG))
4674 return SDValue();
4675
4676 EVT VT = Op.getValueType();
4677 SDLoc dl(Op);
4678 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4679 if (Depth) {
4680 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
4681 SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
4682 return DAG.getLoad(VT, dl, DAG.getEntryNode(),
4683 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
4684 MachinePointerInfo());
4685 }
4686
4687 // Return LR, which contains the return address. Mark it an implicit live-in.
4688 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32));
4689 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
4690}
4691
4692SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
4693 const ARMBaseRegisterInfo &ARI =
4694 *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
4695 MachineFunction &MF = DAG.getMachineFunction();
4696 MachineFrameInfo &MFI = MF.getFrameInfo();
4697 MFI.setFrameAddressIsTaken(true);
4698
4699 EVT VT = Op.getValueType();
4700 SDLoc dl(Op); // FIXME probably not meaningful
4701 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
4702 unsigned FrameReg = ARI.getFrameRegister(MF);
4703 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT);
4704 while (Depth--)
4705 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
4706 MachinePointerInfo());
4707 return FrameAddr;
4708}
4709
4710// FIXME? Maybe this could be a TableGen attribute on some registers and
4711// this table could be generated automatically from RegInfo.
4712unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT,
4713 SelectionDAG &DAG) const {
4714 unsigned Reg = StringSwitch<unsigned>(RegName)
4715 .Case("sp", ARM::SP)
4716 .Default(0);
4717 if (Reg)
4718 return Reg;
4719 report_fatal_error(Twine("Invalid register name \""
4720 + StringRef(RegName) + "\"."));
4721}
4722
4723// Result is 64 bit value so split into two 32 bit values and return as a
4724// pair of values.
4725static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl<SDValue> &Results,
4726 SelectionDAG &DAG) {
4727 SDLoc DL(N);
4728
4729 // This function is only supposed to be called for i64 type destination.
4730 assert(N->getValueType(0) == MVT::i64((N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4731, __PRETTY_FUNCTION__))
4731 && "ExpandREAD_REGISTER called for non-i64 type result.")((N->getValueType(0) == MVT::i64 && "ExpandREAD_REGISTER called for non-i64 type result."
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"ExpandREAD_REGISTER called for non-i64 type result.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4731, __PRETTY_FUNCTION__))
;
4732
4733 SDValue Read = DAG.getNode(ISD::READ_REGISTER, DL,
4734 DAG.getVTList(MVT::i32, MVT::i32, MVT::Other),
4735 N->getOperand(0),
4736 N->getOperand(1));
4737
4738 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Read.getValue(0),
4739 Read.getValue(1)));
4740 Results.push_back(Read.getOperand(0));
4741}
4742
4743/// \p BC is a bitcast that is about to be turned into a VMOVDRR.
4744/// When \p DstVT, the destination type of \p BC, is on the vector
4745/// register bank and the source of bitcast, \p Op, operates on the same bank,
4746/// it might be possible to combine them, such that everything stays on the
4747/// vector register bank.
4748/// \p return The node that would replace \p BT, if the combine
4749/// is possible.
4750static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC,
4751 SelectionDAG &DAG) {
4752 SDValue Op = BC->getOperand(0);
4753 EVT DstVT = BC->getValueType(0);
4754
4755 // The only vector instruction that can produce a scalar (remember,
4756 // since the bitcast was about to be turned into VMOVDRR, the source
4757 // type is i64) from a vector is EXTRACT_VECTOR_ELT.
4758 // Moreover, we can do this combine only if there is one use.
4759 // Finally, if the destination type is not a vector, there is not
4760 // much point on forcing everything on the vector bank.
4761 if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
4762 !Op.hasOneUse())
4763 return SDValue();
4764
4765 // If the index is not constant, we will introduce an additional
4766 // multiply that will stick.
4767 // Give up in that case.
4768 ConstantSDNode *Index = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4769 if (!Index)
4770 return SDValue();
4771 unsigned DstNumElt = DstVT.getVectorNumElements();
4772
4773 // Compute the new index.
4774 const APInt &APIntIndex = Index->getAPIntValue();
4775 APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt);
4776 NewIndex *= APIntIndex;
4777 // Check if the new constant index fits into i32.
4778 if (NewIndex.getBitWidth() > 32)
4779 return SDValue();
4780
4781 // vMTy bitcast(i64 extractelt vNi64 src, i32 index) ->
4782 // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M)
4783 SDLoc dl(Op);
4784 SDValue ExtractSrc = Op.getOperand(0);
4785 EVT VecVT = EVT::getVectorVT(
4786 *DAG.getContext(), DstVT.getScalarType(),
4787 ExtractSrc.getValueType().getVectorNumElements() * DstNumElt);
4788 SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc);
4789 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast,
4790 DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32));
4791}
4792
4793/// ExpandBITCAST - If the target supports VFP, this function is called to
4794/// expand a bit convert where either the source or destination type is i64 to
4795/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64
4796/// operand type is illegal (e.g., v2f32 for a target that doesn't support
4797/// vectors), since the legalizer won't know what to do with that.
4798static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) {
4799 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4800 SDLoc dl(N);
4801 SDValue Op = N->getOperand(0);
4802
4803 // This function is only supposed to be called for i64 types, either as the
4804 // source or destination of the bit convert.
4805 EVT SrcVT = Op.getValueType();
4806 EVT DstVT = N->getValueType(0);
4807 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) &&(((SrcVT == MVT::i64 || DstVT == MVT::i64) && "ExpandBITCAST called for non-i64 type"
) ? static_cast<void> (0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4808, __PRETTY_FUNCTION__))
4808 "ExpandBITCAST called for non-i64 type")(((SrcVT == MVT::i64 || DstVT == MVT::i64) && "ExpandBITCAST called for non-i64 type"
) ? static_cast<void> (0) : __assert_fail ("(SrcVT == MVT::i64 || DstVT == MVT::i64) && \"ExpandBITCAST called for non-i64 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4808, __PRETTY_FUNCTION__))
;
4809
4810 // Turn i64->f64 into VMOVDRR.
4811 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) {
4812 // Do not force values to GPRs (this is what VMOVDRR does for the inputs)
4813 // if we can combine the bitcast with its source.
4814 if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG))
4815 return Val;
4816
4817 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4818 DAG.getConstant(0, dl, MVT::i32));
4819 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
4820 DAG.getConstant(1, dl, MVT::i32));
4821 return DAG.getNode(ISD::BITCAST, dl, DstVT,
4822 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi));
4823 }
4824
4825 // Turn f64->i64 into VMOVRRD.
4826 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) {
4827 SDValue Cvt;
4828 if (DAG.getDataLayout().isBigEndian() && SrcVT.isVector() &&
4829 SrcVT.getVectorNumElements() > 1)
4830 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4831 DAG.getVTList(MVT::i32, MVT::i32),
4832 DAG.getNode(ARMISD::VREV64, dl, SrcVT, Op));
4833 else
4834 Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
4835 DAG.getVTList(MVT::i32, MVT::i32), Op);
4836 // Merge the pieces into a single i64 value.
4837 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1));
4838 }
4839
4840 return SDValue();
4841}
4842
4843/// getZeroVector - Returns a vector of specified type with all zero elements.
4844/// Zero vectors are used to represent vector negation and in those cases
4845/// will be implemented with the NEON VNEG instruction. However, VNEG does
4846/// not support i64 elements, so sometimes the zero vectors will need to be
4847/// explicitly constructed. Regardless, use a canonical VMOV to create the
4848/// zero vector.
4849static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) {
4850 assert(VT.isVector() && "Expected a vector type")((VT.isVector() && "Expected a vector type") ? static_cast
<void> (0) : __assert_fail ("VT.isVector() && \"Expected a vector type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4850, __PRETTY_FUNCTION__))
;
4851 // The canonical modified immediate encoding of a zero vector is....0!
4852 SDValue EncodedVal = DAG.getTargetConstant(0, dl, MVT::i32);
4853 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
4854 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal);
4855 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
4856}
4857
4858/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
4859/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4860SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
4861 SelectionDAG &DAG) const {
4862 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4862, __PRETTY_FUNCTION__))
;
4863 EVT VT = Op.getValueType();
4864 unsigned VTBits = VT.getSizeInBits();
4865 SDLoc dl(Op);
4866 SDValue ShOpLo = Op.getOperand(0);
4867 SDValue ShOpHi = Op.getOperand(1);
4868 SDValue ShAmt = Op.getOperand(2);
4869 SDValue ARMcc;
4870 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4871 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
4872
4873 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS)((Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::
SRL_PARTS) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4873, __PRETTY_FUNCTION__))
;
4874
4875 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4876 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4877 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt);
4878 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4879 DAG.getConstant(VTBits, dl, MVT::i32));
4880 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
4881 SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4882 SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
4883 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4884 ISD::SETGE, ARMcc, DAG, dl);
4885 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
4886 ARMcc, CCR, CmpLo);
4887
4888
4889 SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
4890 SDValue HiBigShift = Opc == ISD::SRA
4891 ? DAG.getNode(Opc, dl, VT, ShOpHi,
4892 DAG.getConstant(VTBits - 1, dl, VT))
4893 : DAG.getConstant(0, dl, VT);
4894 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4895 ISD::SETGE, ARMcc, DAG, dl);
4896 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
4897 ARMcc, CCR, CmpHi);
4898
4899 SDValue Ops[2] = { Lo, Hi };
4900 return DAG.getMergeValues(Ops, dl);
4901}
4902
4903/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
4904/// i32 values and take a 2 x i32 value to shift plus a shift amount.
4905SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
4906 SelectionDAG &DAG) const {
4907 assert(Op.getNumOperands() == 3 && "Not a double-shift!")((Op.getNumOperands() == 3 && "Not a double-shift!") ?
static_cast<void> (0) : __assert_fail ("Op.getNumOperands() == 3 && \"Not a double-shift!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4907, __PRETTY_FUNCTION__))
;
4908 EVT VT = Op.getValueType();
4909 unsigned VTBits = VT.getSizeInBits();
4910 SDLoc dl(Op);
4911 SDValue ShOpLo = Op.getOperand(0);
4912 SDValue ShOpHi = Op.getOperand(1);
4913 SDValue ShAmt = Op.getOperand(2);
4914 SDValue ARMcc;
4915 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
4916
4917 assert(Op.getOpcode() == ISD::SHL_PARTS)((Op.getOpcode() == ISD::SHL_PARTS) ? static_cast<void>
(0) : __assert_fail ("Op.getOpcode() == ISD::SHL_PARTS", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4917, __PRETTY_FUNCTION__))
;
4918 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
4919 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
4920 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
4921 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
4922 SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
4923
4924 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
4925 DAG.getConstant(VTBits, dl, MVT::i32));
4926 SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
4927 SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4928 ISD::SETGE, ARMcc, DAG, dl);
4929 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
4930 ARMcc, CCR, CmpHi);
4931
4932 SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
4933 ISD::SETGE, ARMcc, DAG, dl);
4934 SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
4935 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
4936 DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
4937
4938 SDValue Ops[2] = { Lo, Hi };
4939 return DAG.getMergeValues(Ops, dl);
4940}
4941
4942SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
4943 SelectionDAG &DAG) const {
4944 // The rounding mode is in bits 23:22 of the FPSCR.
4945 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
4946 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
4947 // so that the shift + and get folded into a bitfield extract.
4948 SDLoc dl(Op);
4949 SDValue Ops[] = { DAG.getEntryNode(),
4950 DAG.getConstant(Intrinsic::arm_get_fpscr, dl, MVT::i32) };
4951
4952 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_W_CHAIN, dl, MVT::i32, Ops);
4953 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR,
4954 DAG.getConstant(1U << 22, dl, MVT::i32));
4955 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
4956 DAG.getConstant(22, dl, MVT::i32));
4957 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
4958 DAG.getConstant(3, dl, MVT::i32));
4959}
4960
4961static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG,
4962 const ARMSubtarget *ST) {
4963 SDLoc dl(N);
4964 EVT VT = N->getValueType(0);
4965 if (VT.isVector()) {
4966 assert(ST->hasNEON())((ST->hasNEON()) ? static_cast<void> (0) : __assert_fail
("ST->hasNEON()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 4966, __PRETTY_FUNCTION__))
;
4967
4968 // Compute the least significant set bit: LSB = X & -X
4969 SDValue X = N->getOperand(0);
4970 SDValue NX = DAG.getNode(ISD::SUB, dl, VT, getZeroVector(VT, DAG, dl), X);
4971 SDValue LSB = DAG.getNode(ISD::AND, dl, VT, X, NX);
4972
4973 EVT ElemTy = VT.getVectorElementType();
4974
4975 if (ElemTy == MVT::i8) {
4976 // Compute with: cttz(x) = ctpop(lsb - 1)
4977 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
4978 DAG.getTargetConstant(1, dl, ElemTy));
4979 SDValue Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
4980 return DAG.getNode(ISD::CTPOP, dl, VT, Bits);
4981 }
4982
4983 if ((ElemTy == MVT::i16 || ElemTy == MVT::i32) &&
4984 (N->getOpcode() == ISD::CTTZ_ZERO_UNDEF)) {
4985 // Compute with: cttz(x) = (width - 1) - ctlz(lsb), if x != 0
4986 unsigned NumBits = ElemTy.getSizeInBits();
4987 SDValue WidthMinus1 =
4988 DAG.getNode(ARMISD::VMOVIMM, dl, VT,
4989 DAG.getTargetConstant(NumBits - 1, dl, ElemTy));
4990 SDValue CTLZ = DAG.getNode(ISD::CTLZ, dl, VT, LSB);
4991 return DAG.getNode(ISD::SUB, dl, VT, WidthMinus1, CTLZ);
4992 }
4993
4994 // Compute with: cttz(x) = ctpop(lsb - 1)
4995
4996 // Since we can only compute the number of bits in a byte with vcnt.8, we
4997 // have to gather the result with pairwise addition (vpaddl) for i16, i32,
4998 // and i64.
4999
5000 // Compute LSB - 1.
5001 SDValue Bits;
5002 if (ElemTy == MVT::i64) {
5003 // Load constant 0xffff'ffff'ffff'ffff to register.
5004 SDValue FF = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5005 DAG.getTargetConstant(0x1eff, dl, MVT::i32));
5006 Bits = DAG.getNode(ISD::ADD, dl, VT, LSB, FF);
5007 } else {
5008 SDValue One = DAG.getNode(ARMISD::VMOVIMM, dl, VT,
5009 DAG.getTargetConstant(1, dl, ElemTy));
5010 Bits = DAG.getNode(ISD::SUB, dl, VT, LSB, One);
5011 }
5012
5013 // Count #bits with vcnt.8.
5014 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5015 SDValue BitsVT8 = DAG.getNode(ISD::BITCAST, dl, VT8Bit, Bits);
5016 SDValue Cnt8 = DAG.getNode(ISD::CTPOP, dl, VT8Bit, BitsVT8);
5017
5018 // Gather the #bits with vpaddl (pairwise add.)
5019 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5020 SDValue Cnt16 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT16Bit,
5021 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5022 Cnt8);
5023 if (ElemTy == MVT::i16)
5024 return Cnt16;
5025
5026 EVT VT32Bit = VT.is64BitVector() ? MVT::v2i32 : MVT::v4i32;
5027 SDValue Cnt32 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT32Bit,
5028 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5029 Cnt16);
5030 if (ElemTy == MVT::i32)
5031 return Cnt32;
5032
5033 assert(ElemTy == MVT::i64)((ElemTy == MVT::i64) ? static_cast<void> (0) : __assert_fail
("ElemTy == MVT::i64", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5033, __PRETTY_FUNCTION__))
;
5034 SDValue Cnt64 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5035 DAG.getTargetConstant(Intrinsic::arm_neon_vpaddlu, dl, MVT::i32),
5036 Cnt32);
5037 return Cnt64;
5038 }
5039
5040 if (!ST->hasV6T2Ops())
5041 return SDValue();
5042
5043 SDValue rbit = DAG.getNode(ISD::BITREVERSE, dl, VT, N->getOperand(0));
5044 return DAG.getNode(ISD::CTLZ, dl, VT, rbit);
5045}
5046
5047/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count
5048/// for each 16-bit element from operand, repeated. The basic idea is to
5049/// leverage vcnt to get the 8-bit counts, gather and add the results.
5050///
5051/// Trace for v4i16:
5052/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
5053/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element)
5054/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi)
5055/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6]
5056/// [b0 b1 b2 b3 b4 b5 b6 b7]
5057/// +[b1 b0 b3 b2 b5 b4 b7 b6]
5058/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0,
5059/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits)
5060static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) {
5061 EVT VT = N->getValueType(0);
5062 SDLoc DL(N);
5063
5064 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8;
5065 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0));
5066 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0);
5067 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1);
5068 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2);
5069 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3);
5070}
5071
5072/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the
5073/// bit-count for each 16-bit element from the operand. We need slightly
5074/// different sequencing for v4i16 and v8i16 to stay within NEON's available
5075/// 64/128-bit registers.
5076///
5077/// Trace for v4i16:
5078/// input = [v0 v1 v2 v3 ] (vi 16-bit element)
5079/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi)
5080/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ]
5081/// v4i16:Extracted = [k0 k1 k2 k3 ]
5082static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) {
5083 EVT VT = N->getValueType(0);
5084 SDLoc DL(N);
5085
5086 SDValue BitCounts = getCTPOP16BitCounts(N, DAG);
5087 if (VT.is64BitVector()) {
5088 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts);
5089 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended,
5090 DAG.getIntPtrConstant(0, DL));
5091 } else {
5092 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8,
5093 BitCounts, DAG.getIntPtrConstant(0, DL));
5094 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted);
5095 }
5096}
5097
5098/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the
5099/// bit-count for each 32-bit element from the operand. The idea here is
5100/// to split the vector into 16-bit elements, leverage the 16-bit count
5101/// routine, and then combine the results.
5102///
5103/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged):
5104/// input = [v0 v1 ] (vi: 32-bit elements)
5105/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1])
5106/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi)
5107/// vrev: N0 = [k1 k0 k3 k2 ]
5108/// [k0 k1 k2 k3 ]
5109/// N1 =+[k1 k0 k3 k2 ]
5110/// [k0 k2 k1 k3 ]
5111/// N2 =+[k1 k3 k0 k2 ]
5112/// [k0 k2 k1 k3 ]
5113/// Extended =+[k1 k3 k0 k2 ]
5114/// [k0 k2 ]
5115/// Extracted=+[k1 k3 ]
5116///
5117static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) {
5118 EVT VT = N->getValueType(0);
5119 SDLoc DL(N);
5120
5121 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16;
5122
5123 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0));
5124 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG);
5125 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16);
5126 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0);
5127 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1);
5128
5129 if (VT.is64BitVector()) {
5130 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2);
5131 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended,
5132 DAG.getIntPtrConstant(0, DL));
5133 } else {
5134 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2,
5135 DAG.getIntPtrConstant(0, DL));
5136 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted);
5137 }
5138}
5139
5140static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG,
5141 const ARMSubtarget *ST) {
5142 EVT VT = N->getValueType(0);
5143
5144 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON.")((ST->hasNEON() && "Custom ctpop lowering requires NEON."
) ? static_cast<void> (0) : __assert_fail ("ST->hasNEON() && \"Custom ctpop lowering requires NEON.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5144, __PRETTY_FUNCTION__))
;
5145 assert((VT == MVT::v2i32 || VT == MVT::v4i32 ||(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5147, __PRETTY_FUNCTION__))
5146 VT == MVT::v4i16 || VT == MVT::v8i16) &&(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5147, __PRETTY_FUNCTION__))
5147 "Unexpected type for custom ctpop lowering")(((VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 ||
VT == MVT::v8i16) && "Unexpected type for custom ctpop lowering"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v2i32 || VT == MVT::v4i32 || VT == MVT::v4i16 || VT == MVT::v8i16) && \"Unexpected type for custom ctpop lowering\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5147, __PRETTY_FUNCTION__))
;
5148
5149 if (VT.getVectorElementType() == MVT::i32)
5150 return lowerCTPOP32BitElements(N, DAG);
5151 else
5152 return lowerCTPOP16BitElements(N, DAG);
5153}
5154
5155static SDValue LowerShift(SDNode *N, SelectionDAG &DAG,
5156 const ARMSubtarget *ST) {
5157 EVT VT = N->getValueType(0);
5158 SDLoc dl(N);
5159
5160 if (!VT.isVector())
5161 return SDValue();
5162
5163 // Lower vector shifts on NEON to use VSHL.
5164 assert(ST->hasNEON() && "unexpected vector shift")((ST->hasNEON() && "unexpected vector shift") ? static_cast
<void> (0) : __assert_fail ("ST->hasNEON() && \"unexpected vector shift\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5164, __PRETTY_FUNCTION__))
;
5165
5166 // Left shifts translate directly to the vshiftu intrinsic.
5167 if (N->getOpcode() == ISD::SHL)
5168 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5169 DAG.getConstant(Intrinsic::arm_neon_vshiftu, dl,
5170 MVT::i32),
5171 N->getOperand(0), N->getOperand(1));
5172
5173 assert((N->getOpcode() == ISD::SRA ||(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "unexpected vector shift opcode") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5174, __PRETTY_FUNCTION__))
5174 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode")(((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::
SRL) && "unexpected vector shift opcode") ? static_cast
<void> (0) : __assert_fail ("(N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && \"unexpected vector shift opcode\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5174, __PRETTY_FUNCTION__))
;
5175
5176 // NEON uses the same intrinsics for both left and right shifts. For
5177 // right shifts, the shift amounts are negative, so negate the vector of
5178 // shift amounts.
5179 EVT ShiftVT = N->getOperand(1).getValueType();
5180 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT,
5181 getZeroVector(ShiftVT, DAG, dl),
5182 N->getOperand(1));
5183 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ?
5184 Intrinsic::arm_neon_vshifts :
5185 Intrinsic::arm_neon_vshiftu);
5186 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT,
5187 DAG.getConstant(vshiftInt, dl, MVT::i32),
5188 N->getOperand(0), NegatedCount);
5189}
5190
5191static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG,
5192 const ARMSubtarget *ST) {
5193 EVT VT = N->getValueType(0);
5194 SDLoc dl(N);
5195
5196 // We can get here for a node like i32 = ISD::SHL i32, i64
5197 if (VT != MVT::i64)
5198 return SDValue();
5199
5200 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) &&(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA) && "Unknown shift to lower!") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5201, __PRETTY_FUNCTION__))
5201 "Unknown shift to lower!")(((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::
SRA) && "Unknown shift to lower!") ? static_cast<void
> (0) : __assert_fail ("(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && \"Unknown shift to lower!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5201, __PRETTY_FUNCTION__))
;
5202
5203 // We only lower SRA, SRL of 1 here, all others use generic lowering.
5204 if (!isOneConstant(N->getOperand(1)))
5205 return SDValue();
5206
5207 // If we are in thumb mode, we don't have RRX.
5208 if (ST->isThumb1Only()) return SDValue();
5209
5210 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr.
5211 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5212 DAG.getConstant(0, dl, MVT::i32));
5213 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0),
5214 DAG.getConstant(1, dl, MVT::i32));
5215
5216 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and
5217 // captures the result into a carry flag.
5218 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG;
5219 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), Hi);
5220
5221 // The low part is an ARMISD::RRX operand, which shifts the carry in.
5222 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1));
5223
5224 // Merge the pieces into a single i64 value.
5225 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
5226}
5227
5228static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
5229 SDValue TmpOp0, TmpOp1;
5230 bool Invert = false;
5231 bool Swap = false;
5232 unsigned Opc = 0;
5233
5234 SDValue Op0 = Op.getOperand(0);
5235 SDValue Op1 = Op.getOperand(1);
5236 SDValue CC = Op.getOperand(2);
5237 EVT CmpVT = Op0.getValueType().changeVectorElementTypeToInteger();
5238 EVT VT = Op.getValueType();
5239 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
5240 SDLoc dl(Op);
5241
5242 if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
5243 (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
5244 // Special-case integer 64-bit equality comparisons. They aren't legal,
5245 // but they can be lowered with a few vector instructions.
5246 unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
5247 EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
5248 SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
5249 SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
5250 SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
5251 DAG.getCondCode(ISD::SETEQ));
5252 SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
5253 SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
5254 Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
5255 if (SetCCOpcode == ISD::SETNE)
5256 Merged = DAG.getNOT(dl, Merged, CmpVT);
5257 Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
5258 return Merged;
5259 }
5260
5261 if (CmpVT.getVectorElementType() == MVT::i64)
5262 // 64-bit comparisons are not legal in general.
5263 return SDValue();
5264
5265 if (Op1.getValueType().isFloatingPoint()) {
5266 switch (SetCCOpcode) {
5267 default: llvm_unreachable("Illegal FP comparison")::llvm::llvm_unreachable_internal("Illegal FP comparison", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5267)
;
5268 case ISD::SETUNE:
5269 case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5270 case ISD::SETOEQ:
5271 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5272 case ISD::SETOLT:
5273 case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5274 case ISD::SETOGT:
5275 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5276 case ISD::SETOLE:
5277 case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5278 case ISD::SETOGE:
5279 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5280 case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5281 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
5282 case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5283 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
5284 case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH[[clang::fallthrough]];
5285 case ISD::SETONE:
5286 // Expand this to (OLT | OGT).
5287 TmpOp0 = Op0;
5288 TmpOp1 = Op1;
5289 Opc = ISD::OR;
5290 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5291 Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
5292 break;
5293 case ISD::SETUO:
5294 Invert = true;
5295 LLVM_FALLTHROUGH[[clang::fallthrough]];
5296 case ISD::SETO:
5297 // Expand this to (OLT | OGE).
5298 TmpOp0 = Op0;
5299 TmpOp1 = Op1;
5300 Opc = ISD::OR;
5301 Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
5302 Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1);
5303 break;
5304 }
5305 } else {
5306 // Integer comparisons.
5307 switch (SetCCOpcode) {
5308 default: llvm_unreachable("Illegal integer comparison")::llvm::llvm_unreachable_internal("Illegal integer comparison"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5308)
;
5309 case ISD::SETNE: Invert = true;
5310 case ISD::SETEQ: Opc = ARMISD::VCEQ; break;
5311 case ISD::SETLT: Swap = true;
5312 case ISD::SETGT: Opc = ARMISD::VCGT; break;
5313 case ISD::SETLE: Swap = true;
5314 case ISD::SETGE: Opc = ARMISD::VCGE; break;
5315 case ISD::SETULT: Swap = true;
5316 case ISD::SETUGT: Opc = ARMISD::VCGTU; break;
5317 case ISD::SETULE: Swap = true;
5318 case ISD::SETUGE: Opc = ARMISD::VCGEU; break;
5319 }
5320
5321 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero).
5322 if (Opc == ARMISD::VCEQ) {
5323
5324 SDValue AndOp;
5325 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5326 AndOp = Op0;
5327 else if (ISD::isBuildVectorAllZeros(Op0.getNode()))
5328 AndOp = Op1;
5329
5330 // Ignore bitconvert.
5331 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST)
5332 AndOp = AndOp.getOperand(0);
5333
5334 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) {
5335 Opc = ARMISD::VTST;
5336 Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0));
5337 Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1));
5338 Invert = !Invert;
5339 }
5340 }
5341 }
5342
5343 if (Swap)
5344 std::swap(Op0, Op1);
5345
5346 // If one of the operands is a constant vector zero, attempt to fold the
5347 // comparison to a specialized compare-against-zero form.
5348 SDValue SingleOp;
5349 if (ISD::isBuildVectorAllZeros(Op1.getNode()))
5350 SingleOp = Op0;
5351 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
5352 if (Opc == ARMISD::VCGE)
5353 Opc = ARMISD::VCLEZ;
5354 else if (Opc == ARMISD::VCGT)
5355 Opc = ARMISD::VCLTZ;
5356 SingleOp = Op1;
5357 }
5358
5359 SDValue Result;
5360 if (SingleOp.getNode()) {
5361 switch (Opc) {
5362 case ARMISD::VCEQ:
5363 Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break;
5364 case ARMISD::VCGE:
5365 Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break;
5366 case ARMISD::VCLEZ:
5367 Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break;
5368 case ARMISD::VCGT:
5369 Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break;
5370 case ARMISD::VCLTZ:
5371 Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break;
5372 default:
5373 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5374 }
5375 } else {
5376 Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1);
5377 }
5378
5379 Result = DAG.getSExtOrTrunc(Result, dl, VT);
5380
5381 if (Invert)
5382 Result = DAG.getNOT(dl, Result, VT);
5383
5384 return Result;
5385}
5386
5387static SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) {
5388 SDValue LHS = Op.getOperand(0);
5389 SDValue RHS = Op.getOperand(1);
5390 SDValue Carry = Op.getOperand(2);
5391 SDValue Cond = Op.getOperand(3);
5392 SDLoc DL(Op);
5393
5394 assert(LHS.getSimpleValueType().isInteger() && "SETCCE is integer only.")((LHS.getSimpleValueType().isInteger() && "SETCCE is integer only."
) ? static_cast<void> (0) : __assert_fail ("LHS.getSimpleValueType().isInteger() && \"SETCCE is integer only.\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5394, __PRETTY_FUNCTION__))
;
5395
5396 assert(Carry.getOpcode() != ISD::CARRY_FALSE)((Carry.getOpcode() != ISD::CARRY_FALSE) ? static_cast<void
> (0) : __assert_fail ("Carry.getOpcode() != ISD::CARRY_FALSE"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5396, __PRETTY_FUNCTION__))
;
5397 SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
5398 SDValue Cmp = DAG.getNode(ARMISD::SUBE, DL, VTs, LHS, RHS, Carry);
5399
5400 SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
5401 SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
5402 SDValue ARMcc = DAG.getConstant(
5403 IntCCToARMCC(cast<CondCodeSDNode>(Cond)->get()), DL, MVT::i32);
5404 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
5405 SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), DL, ARM::CPSR,
5406 Cmp.getValue(1), SDValue());
5407 return DAG.getNode(ARMISD::CMOV, DL, Op.getValueType(), FVal, TVal, ARMcc,
5408 CCR, Chain.getValue(1));
5409}
5410
5411/// isNEONModifiedImm - Check if the specified splat value corresponds to a
5412/// valid vector constant for a NEON instruction with a "modified immediate"
5413/// operand (e.g., VMOV). If so, return the encoded value.
5414static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
5415 unsigned SplatBitSize, SelectionDAG &DAG,
5416 const SDLoc &dl, EVT &VT, bool is128Bits,
5417 NEONModImmType type) {
5418 unsigned OpCmode, Imm;
5419
5420 // SplatBitSize is set to the smallest size that splats the vector, so a
5421 // zero vector will always have SplatBitSize == 8. However, NEON modified
5422 // immediate instructions others than VMOV do not support the 8-bit encoding
5423 // of a zero vector, and the default encoding of zero is supposed to be the
5424 // 32-bit version.
5425 if (SplatBits == 0)
5426 SplatBitSize = 32;
5427
5428 switch (SplatBitSize) {
5429 case 8:
5430 if (type != VMOVModImm)
5431 return SDValue();
5432 // Any 1-byte value is OK. Op=0, Cmode=1110.
5433 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big")(((SplatBits & ~0xff) == 0 && "one byte splat value is too big"
) ? static_cast<void> (0) : __assert_fail ("(SplatBits & ~0xff) == 0 && \"one byte splat value is too big\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5433, __PRETTY_FUNCTION__))
;
5434 OpCmode = 0xe;
5435 Imm = SplatBits;
5436 VT = is128Bits ? MVT::v16i8 : MVT::v8i8;
5437 break;
5438
5439 case 16:
5440 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero.
5441 VT = is128Bits ? MVT::v8i16 : MVT::v4i16;
5442 if ((SplatBits & ~0xff) == 0) {
5443 // Value = 0x00nn: Op=x, Cmode=100x.
5444 OpCmode = 0x8;
5445 Imm = SplatBits;
5446 break;
5447 }
5448 if ((SplatBits & ~0xff00) == 0) {
5449 // Value = 0xnn00: Op=x, Cmode=101x.
5450 OpCmode = 0xa;
5451 Imm = SplatBits >> 8;
5452 break;
5453 }
5454 return SDValue();
5455
5456 case 32:
5457 // NEON's 32-bit VMOV supports splat values where:
5458 // * only one byte is nonzero, or
5459 // * the least significant byte is 0xff and the second byte is nonzero, or
5460 // * the least significant 2 bytes are 0xff and the third is nonzero.
5461 VT = is128Bits ? MVT::v4i32 : MVT::v2i32;
5462 if ((SplatBits & ~0xff) == 0) {
5463 // Value = 0x000000nn: Op=x, Cmode=000x.
5464 OpCmode = 0;
5465 Imm = SplatBits;
5466 break;
5467 }
5468 if ((SplatBits & ~0xff00) == 0) {
5469 // Value = 0x0000nn00: Op=x, Cmode=001x.
5470 OpCmode = 0x2;
5471 Imm = SplatBits >> 8;
5472 break;
5473 }
5474 if ((SplatBits & ~0xff0000) == 0) {
5475 // Value = 0x00nn0000: Op=x, Cmode=010x.
5476 OpCmode = 0x4;
5477 Imm = SplatBits >> 16;
5478 break;
5479 }
5480 if ((SplatBits & ~0xff000000) == 0) {
5481 // Value = 0xnn000000: Op=x, Cmode=011x.
5482 OpCmode = 0x6;
5483 Imm = SplatBits >> 24;
5484 break;
5485 }
5486
5487 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC
5488 if (type == OtherModImm) return SDValue();
5489
5490 if ((SplatBits & ~0xffff) == 0 &&
5491 ((SplatBits | SplatUndef) & 0xff) == 0xff) {
5492 // Value = 0x0000nnff: Op=x, Cmode=1100.
5493 OpCmode = 0xc;
5494 Imm = SplatBits >> 8;
5495 break;
5496 }
5497
5498 if ((SplatBits & ~0xffffff) == 0 &&
5499 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) {
5500 // Value = 0x00nnffff: Op=x, Cmode=1101.
5501 OpCmode = 0xd;
5502 Imm = SplatBits >> 16;
5503 break;
5504 }
5505
5506 // Note: there are a few 32-bit splat values (specifically: 00ffff00,
5507 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not
5508 // VMOV.I32. A (very) minor optimization would be to replicate the value
5509 // and fall through here to test for a valid 64-bit splat. But, then the
5510 // caller would also need to check and handle the change in size.
5511 return SDValue();
5512
5513 case 64: {
5514 if (type != VMOVModImm)
5515 return SDValue();
5516 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff.
5517 uint64_t BitMask = 0xff;
5518 uint64_t Val = 0;
5519 unsigned ImmMask = 1;
5520 Imm = 0;
5521 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) {
5522 if (((SplatBits | SplatUndef) & BitMask) == BitMask) {
5523 Val |= BitMask;
5524 Imm |= ImmMask;
5525 } else if ((SplatBits & BitMask) != 0) {
5526 return SDValue();
5527 }
5528 BitMask <<= 8;
5529 ImmMask <<= 1;
5530 }
5531
5532 if (DAG.getDataLayout().isBigEndian())
5533 // swap higher and lower 32 bit word
5534 Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4);
5535
5536 // Op=1, Cmode=1110.
5537 OpCmode = 0x1e;
5538 VT = is128Bits ? MVT::v2i64 : MVT::v1i64;
5539 break;
5540 }
5541
5542 default:
5543 llvm_unreachable("unexpected size for isNEONModifiedImm")::llvm::llvm_unreachable_internal("unexpected size for isNEONModifiedImm"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5543)
;
5544 }
5545
5546 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm);
5547 return DAG.getTargetConstant(EncodedVal, dl, MVT::i32);
5548}
5549
5550SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
5551 const ARMSubtarget *ST) const {
5552 bool IsDouble = Op.getValueType() == MVT::f64;
5553 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
5554 const APFloat &FPVal = CFP->getValueAPF();
5555
5556 // Prevent floating-point constants from using literal loads
5557 // when execute-only is enabled.
5558 if (ST->genExecuteOnly()) {
5559 APInt INTVal = FPVal.bitcastToAPInt();
5560 SDLoc DL(CFP);
5561 if (IsDouble) {
5562 SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
5563 SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
5564 if (!ST->isLittle())
5565 std::swap(Lo, Hi);
5566 return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
5567 } else {
5568 return DAG.getConstant(INTVal, DL, MVT::i32);
5569 }
5570 }
5571
5572 if (!ST->hasVFP3())
5573 return SDValue();
5574
5575 // Use the default (constant pool) lowering for double constants when we have
5576 // an SP-only FPU
5577 if (IsDouble && Subtarget->isFPOnlySP())
5578 return SDValue();
5579
5580 // Try splatting with a VMOV.f32...
5581 int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
5582
5583 if (ImmVal != -1) {
5584 if (IsDouble || !ST->useNEONForSinglePrecisionFP()) {
5585 // We have code in place to select a valid ConstantFP already, no need to
5586 // do any mangling.
5587 return Op;
5588 }
5589
5590 // It's a float and we are trying to use NEON operations where
5591 // possible. Lower it to a splat followed by an extract.
5592 SDLoc DL(Op);
5593 SDValue NewVal = DAG.getTargetConstant(ImmVal, DL, MVT::i32);
5594 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32,
5595 NewVal);
5596 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant,
5597 DAG.getConstant(0, DL, MVT::i32));
5598 }
5599
5600 // The rest of our options are NEON only, make sure that's allowed before
5601 // proceeding..
5602 if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP()))
5603 return SDValue();
5604
5605 EVT VMovVT;
5606 uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue();
5607
5608 // It wouldn't really be worth bothering for doubles except for one very
5609 // important value, which does happen to match: 0.0. So make sure we don't do
5610 // anything stupid.
5611 if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32))
5612 return SDValue();
5613
5614 // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too).
5615 SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op),
5616 VMovVT, false, VMOVModImm);
5617 if (NewVal != SDValue()) {
5618 SDLoc DL(Op);
5619 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT,
5620 NewVal);
5621 if (IsDouble)
5622 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5623
5624 // It's a float: cast and extract a vector element.
5625 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5626 VecConstant);
5627 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5628 DAG.getConstant(0, DL, MVT::i32));
5629 }
5630
5631 // Finally, try a VMVN.i32
5632 NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, SDLoc(Op), VMovVT,
5633 false, VMVNModImm);
5634 if (NewVal != SDValue()) {
5635 SDLoc DL(Op);
5636 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal);
5637
5638 if (IsDouble)
5639 return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant);
5640
5641 // It's a float: cast and extract a vector element.
5642 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32,
5643 VecConstant);
5644 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant,
5645 DAG.getConstant(0, DL, MVT::i32));
5646 }
5647
5648 return SDValue();
5649}
5650
5651// check if an VEXT instruction can handle the shuffle mask when the
5652// vector sources of the shuffle are the same.
5653static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
5654 unsigned NumElts = VT.getVectorNumElements();
5655
5656 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5657 if (M[0] < 0)
5658 return false;
5659
5660 Imm = M[0];
5661
5662 // If this is a VEXT shuffle, the immediate value is the index of the first
5663 // element. The other shuffle indices must be the successive elements after
5664 // the first one.
5665 unsigned ExpectedElt = Imm;
5666 for (unsigned i = 1; i < NumElts; ++i) {
5667 // Increment the expected index. If it wraps around, just follow it
5668 // back to index zero and keep going.
5669 ++ExpectedElt;
5670 if (ExpectedElt == NumElts)
5671 ExpectedElt = 0;
5672
5673 if (M[i] < 0) continue; // ignore UNDEF indices
5674 if (ExpectedElt != static_cast<unsigned>(M[i]))
5675 return false;
5676 }
5677
5678 return true;
5679}
5680
5681static bool isVEXTMask(ArrayRef<int> M, EVT VT,
5682 bool &ReverseVEXT, unsigned &Imm) {
5683 unsigned NumElts = VT.getVectorNumElements();
5684 ReverseVEXT = false;
5685
5686 // Assume that the first shuffle index is not UNDEF. Fail if it is.
5687 if (M[0] < 0)
5688 return false;
5689
5690 Imm = M[0];
5691
5692 // If this is a VEXT shuffle, the immediate value is the index of the first
5693 // element. The other shuffle indices must be the successive elements after
5694 // the first one.
5695 unsigned ExpectedElt = Imm;
5696 for (unsigned i = 1; i < NumElts; ++i) {
5697 // Increment the expected index. If it wraps around, it may still be
5698 // a VEXT but the source vectors must be swapped.
5699 ExpectedElt += 1;
5700 if (ExpectedElt == NumElts * 2) {
5701 ExpectedElt = 0;
5702 ReverseVEXT = true;
5703 }
5704
5705 if (M[i] < 0) continue; // ignore UNDEF indices
5706 if (ExpectedElt != static_cast<unsigned>(M[i]))
5707 return false;
5708 }
5709
5710 // Adjust the index value if the source operands will be swapped.
5711 if (ReverseVEXT)
5712 Imm -= NumElts;
5713
5714 return true;
5715}
5716
5717/// isVREVMask - Check if a vector shuffle corresponds to a VREV
5718/// instruction with the specified blocksize. (The order of the elements
5719/// within each block of the vector is reversed.)
5720static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
5721 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&(((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64") ? static_cast
<void> (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5722, __PRETTY_FUNCTION__))
5722 "Only possible block sizes for VREV are: 16, 32, 64")(((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
"Only possible block sizes for VREV are: 16, 32, 64") ? static_cast
<void> (0) : __assert_fail ("(BlockSize==16 || BlockSize==32 || BlockSize==64) && \"Only possible block sizes for VREV are: 16, 32, 64\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 5722, __PRETTY_FUNCTION__))
;
5723
5724 unsigned EltSz = VT.getScalarSizeInBits();
5725 if (EltSz == 64)
5726 return false;
5727
5728 unsigned NumElts = VT.getVectorNumElements();
5729 unsigned BlockElts = M[0] + 1;
5730 // If the first shuffle index is UNDEF, be optimistic.
5731 if (M[0] < 0)
5732 BlockElts = BlockSize / EltSz;
5733
5734 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz)
5735 return false;
5736
5737 for (unsigned i = 0; i < NumElts; ++i) {
5738 if (M[i] < 0) continue; // ignore UNDEF indices
5739 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts))
5740 return false;
5741 }
5742
5743 return true;
5744}
5745
5746static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
5747 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of
5748 // range, then 0 is placed into the resulting vector. So pretty much any mask
5749 // of 8 elements can work here.
5750 return VT == MVT::v8i8 && M.size() == 8;
5751}
5752
5753// Checks whether the shuffle mask represents a vector transpose (VTRN) by
5754// checking that pairs of elements in the shuffle mask represent the same index
5755// in each vector, incrementing the expected index by 2 at each step.
5756// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 2, 6]
5757// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,c,g}
5758// v2={e,f,g,h}
5759// WhichResult gives the offset for each element in the mask based on which
5760// of the two results it belongs to.
5761//
5762// The transpose can be represented either as:
5763// result1 = shufflevector v1, v2, result1_shuffle_mask
5764// result2 = shufflevector v1, v2, result2_shuffle_mask
5765// where v1/v2 and the shuffle masks have the same number of elements
5766// (here WhichResult (see below) indicates which result is being checked)
5767//
5768// or as:
5769// results = shufflevector v1, v2, shuffle_mask
5770// where both results are returned in one vector and the shuffle mask has twice
5771// as many elements as v1/v2 (here WhichResult will always be 0 if true) here we
5772// want to check the low half and high half of the shuffle mask as if it were
5773// the other case
5774static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5775 unsigned EltSz = VT.getScalarSizeInBits();
5776 if (EltSz == 64)
5777 return false;
5778
5779 unsigned NumElts = VT.getVectorNumElements();
5780 if (M.size() != NumElts && M.size() != NumElts*2)
5781 return false;
5782
5783 // If the mask is twice as long as the input vector then we need to check the
5784 // upper and lower parts of the mask with a matching value for WhichResult
5785 // FIXME: A mask with only even values will be rejected in case the first
5786 // element is undefined, e.g. [-1, 4, 2, 6] will be rejected, because only
5787 // M[0] is used to determine WhichResult
5788 for (unsigned i = 0; i < M.size(); i += NumElts) {
5789 if (M.size() == NumElts * 2)
5790 WhichResult = i / NumElts;
5791 else
5792 WhichResult = M[i] == 0 ? 0 : 1;
5793 for (unsigned j = 0; j < NumElts; j += 2) {
5794 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5795 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + NumElts + WhichResult))
5796 return false;
5797 }
5798 }
5799
5800 if (M.size() == NumElts*2)
5801 WhichResult = 0;
5802
5803 return true;
5804}
5805
5806/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
5807/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5808/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
5809static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5810 unsigned EltSz = VT.getScalarSizeInBits();
5811 if (EltSz == 64)
5812 return false;
5813
5814 unsigned NumElts = VT.getVectorNumElements();
5815 if (M.size() != NumElts && M.size() != NumElts*2)
5816 return false;
5817
5818 for (unsigned i = 0; i < M.size(); i += NumElts) {
5819 if (M.size() == NumElts * 2)
5820 WhichResult = i / NumElts;
5821 else
5822 WhichResult = M[i] == 0 ? 0 : 1;
5823 for (unsigned j = 0; j < NumElts; j += 2) {
5824 if ((M[i+j] >= 0 && (unsigned) M[i+j] != j + WhichResult) ||
5825 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != j + WhichResult))
5826 return false;
5827 }
5828 }
5829
5830 if (M.size() == NumElts*2)
5831 WhichResult = 0;
5832
5833 return true;
5834}
5835
5836// Checks whether the shuffle mask represents a vector unzip (VUZP) by checking
5837// that the mask elements are either all even and in steps of size 2 or all odd
5838// and in steps of size 2.
5839// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 2, 4, 6]
5840// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,c,e,g}
5841// v2={e,f,g,h}
5842// Requires similar checks to that of isVTRNMask with
5843// respect the how results are returned.
5844static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5845 unsigned EltSz = VT.getScalarSizeInBits();
5846 if (EltSz == 64)
5847 return false;
5848
5849 unsigned NumElts = VT.getVectorNumElements();
5850 if (M.size() != NumElts && M.size() != NumElts*2)
5851 return false;
5852
5853 for (unsigned i = 0; i < M.size(); i += NumElts) {
5854 WhichResult = M[i] == 0 ? 0 : 1;
5855 for (unsigned j = 0; j < NumElts; ++j) {
5856 if (M[i+j] >= 0 && (unsigned) M[i+j] != 2 * j + WhichResult)
5857 return false;
5858 }
5859 }
5860
5861 if (M.size() == NumElts*2)
5862 WhichResult = 0;
5863
5864 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5865 if (VT.is64BitVector() && EltSz == 32)
5866 return false;
5867
5868 return true;
5869}
5870
5871/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
5872/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5873/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
5874static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5875 unsigned EltSz = VT.getScalarSizeInBits();
5876 if (EltSz == 64)
5877 return false;
5878
5879 unsigned NumElts = VT.getVectorNumElements();
5880 if (M.size() != NumElts && M.size() != NumElts*2)
5881 return false;
5882
5883 unsigned Half = NumElts / 2;
5884 for (unsigned i = 0; i < M.size(); i += NumElts) {
5885 WhichResult = M[i] == 0 ? 0 : 1;
5886 for (unsigned j = 0; j < NumElts; j += Half) {
5887 unsigned Idx = WhichResult;
5888 for (unsigned k = 0; k < Half; ++k) {
5889 int MIdx = M[i + j + k];
5890 if (MIdx >= 0 && (unsigned) MIdx != Idx)
5891 return false;
5892 Idx += 2;
5893 }
5894 }
5895 }
5896
5897 if (M.size() == NumElts*2)
5898 WhichResult = 0;
5899
5900 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5901 if (VT.is64BitVector() && EltSz == 32)
5902 return false;
5903
5904 return true;
5905}
5906
5907// Checks whether the shuffle mask represents a vector zip (VZIP) by checking
5908// that pairs of elements of the shufflemask represent the same index in each
5909// vector incrementing sequentially through the vectors.
5910// e.g. For v1,v2 of type v4i32 a valid shuffle mask is: [0, 4, 1, 5]
5911// v1={a,b,c,d} => x=shufflevector v1, v2 shufflemask => x={a,e,b,f}
5912// v2={e,f,g,h}
5913// Requires similar checks to that of isVTRNMask with respect the how results
5914// are returned.
5915static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
5916 unsigned EltSz = VT.getScalarSizeInBits();
5917 if (EltSz == 64)
5918 return false;
5919
5920 unsigned NumElts = VT.getVectorNumElements();
5921 if (M.size() != NumElts && M.size() != NumElts*2)
5922 return false;
5923
5924 for (unsigned i = 0; i < M.size(); i += NumElts) {
5925 WhichResult = M[i] == 0 ? 0 : 1;
5926 unsigned Idx = WhichResult * NumElts / 2;
5927 for (unsigned j = 0; j < NumElts; j += 2) {
5928 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
5929 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx + NumElts))
5930 return false;
5931 Idx += 1;
5932 }
5933 }
5934
5935 if (M.size() == NumElts*2)
5936 WhichResult = 0;
5937
5938 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5939 if (VT.is64BitVector() && EltSz == 32)
5940 return false;
5941
5942 return true;
5943}
5944
5945/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
5946/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
5947/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
5948static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
5949 unsigned EltSz = VT.getScalarSizeInBits();
5950 if (EltSz == 64)
5951 return false;
5952
5953 unsigned NumElts = VT.getVectorNumElements();
5954 if (M.size() != NumElts && M.size() != NumElts*2)
5955 return false;
5956
5957 for (unsigned i = 0; i < M.size(); i += NumElts) {
5958 WhichResult = M[i] == 0 ? 0 : 1;
5959 unsigned Idx = WhichResult * NumElts / 2;
5960 for (unsigned j = 0; j < NumElts; j += 2) {
5961 if ((M[i+j] >= 0 && (unsigned) M[i+j] != Idx) ||
5962 (M[i+j+1] >= 0 && (unsigned) M[i+j+1] != Idx))
5963 return false;
5964 Idx += 1;
5965 }
5966 }
5967
5968 if (M.size() == NumElts*2)
5969 WhichResult = 0;
5970
5971 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
5972 if (VT.is64BitVector() && EltSz == 32)
5973 return false;
5974
5975 return true;
5976}
5977
5978/// Check if \p ShuffleMask is a NEON two-result shuffle (VZIP, VUZP, VTRN),
5979/// and return the corresponding ARMISD opcode if it is, or 0 if it isn't.
5980static unsigned isNEONTwoResultShuffleMask(ArrayRef<int> ShuffleMask, EVT VT,
5981 unsigned &WhichResult,
5982 bool &isV_UNDEF) {
5983 isV_UNDEF = false;
5984 if (isVTRNMask(ShuffleMask, VT, WhichResult))
5985 return ARMISD::VTRN;
5986 if (isVUZPMask(ShuffleMask, VT, WhichResult))
5987 return ARMISD::VUZP;
5988 if (isVZIPMask(ShuffleMask, VT, WhichResult))
5989 return ARMISD::VZIP;
5990
5991 isV_UNDEF = true;
5992 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
5993 return ARMISD::VTRN;
5994 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
5995 return ARMISD::VUZP;
5996 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
5997 return ARMISD::VZIP;
5998
5999 return 0;
6000}
6001
6002/// \return true if this is a reverse operation on an vector.
6003static bool isReverseMask(ArrayRef<int> M, EVT VT) {
6004 unsigned NumElts = VT.getVectorNumElements();
6005 // Make sure the mask has the right size.
6006 if (NumElts != M.size())
6007 return false;
6008
6009 // Look for <15, ..., 3, -1, 1, 0>.
6010 for (unsigned i = 0; i != NumElts; ++i)
6011 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i))
6012 return false;
6013
6014 return true;
6015}
6016
6017// If N is an integer constant that can be moved into a register in one
6018// instruction, return an SDValue of such a constant (will become a MOV
6019// instruction). Otherwise return null.
6020static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG,
6021 const ARMSubtarget *ST, const SDLoc &dl) {
6022 uint64_t Val;
6023 if (!isa<ConstantSDNode>(N))
6024 return SDValue();
6025 Val = cast<ConstantSDNode>(N)->getZExtValue();
6026
6027 if (ST->isThumb1Only()) {
6028 if (Val <= 255 || ~Val <= 255)
6029 return DAG.getConstant(Val, dl, MVT::i32);
6030 } else {
6031 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1)
6032 return DAG.getConstant(Val, dl, MVT::i32);
6033 }
6034 return SDValue();
6035}
6036
6037// If this is a case we can't handle, return null and let the default
6038// expansion code take care of it.
6039SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
6040 const ARMSubtarget *ST) const {
6041 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode());
6042 SDLoc dl(Op);
6043 EVT VT = Op.getValueType();
6044
6045 APInt SplatBits, SplatUndef;
6046 unsigned SplatBitSize;
6047 bool HasAnyUndefs;
6048 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
6049 if (SplatUndef.isAllOnesValue())
6050 return DAG.getUNDEF(VT);
6051
6052 if (SplatBitSize <= 64) {
6053 // Check if an immediate VMOV works.
6054 EVT VmovVT;
6055 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(),
6056 SplatUndef.getZExtValue(), SplatBitSize,
6057 DAG, dl, VmovVT, VT.is128BitVector(),
6058 VMOVModImm);
6059 if (Val.getNode()) {
6060 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val);
6061 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6062 }
6063
6064 // Try an immediate VMVN.
6065 uint64_t NegatedImm = (~SplatBits).getZExtValue();
6066 Val = isNEONModifiedImm(NegatedImm,
6067 SplatUndef.getZExtValue(), SplatBitSize,
6068 DAG, dl, VmovVT, VT.is128BitVector(),
6069 VMVNModImm);
6070 if (Val.getNode()) {
6071 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val);
6072 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov);
6073 }
6074
6075 // Use vmov.f32 to materialize other v2f32 and v4f32 splats.
6076 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) {
6077 int ImmVal = ARM_AM::getFP32Imm(SplatBits);
6078 if (ImmVal != -1) {
6079 SDValue Val = DAG.getTargetConstant(ImmVal, dl, MVT::i32);
6080 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val);
6081 }
6082 }
6083 }
6084 }
6085
6086 // Scan through the operands to see if only one value is used.
6087 //
6088 // As an optimisation, even if more than one value is used it may be more
6089 // profitable to splat with one value then change some lanes.
6090 //
6091 // Heuristically we decide to do this if the vector has a "dominant" value,
6092 // defined as splatted to more than half of the lanes.
6093 unsigned NumElts = VT.getVectorNumElements();
6094 bool isOnlyLowElement = true;
6095 bool usesOnlyOneValue = true;
6096 bool hasDominantValue = false;
6097 bool isConstant = true;
6098
6099 // Map of the number of times a particular SDValue appears in the
6100 // element list.
6101 DenseMap<SDValue, unsigned> ValueCounts;
6102 SDValue Value;
6103 for (unsigned i = 0; i < NumElts; ++i) {
6104 SDValue V = Op.getOperand(i);
6105 if (V.isUndef())
6106 continue;
6107 if (i > 0)
6108 isOnlyLowElement = false;
6109 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V))
6110 isConstant = false;
6111
6112 ValueCounts.insert(std::make_pair(V, 0));
6113 unsigned &Count = ValueCounts[V];
6114
6115 // Is this value dominant? (takes up more than half of the lanes)
6116 if (++Count > (NumElts / 2)) {
6117 hasDominantValue = true;
6118 Value = V;
6119 }
6120 }
6121 if (ValueCounts.size() != 1)
6122 usesOnlyOneValue = false;
6123 if (!Value.getNode() && !ValueCounts.empty())
6124 Value = ValueCounts.begin()->first;
6125
6126 if (ValueCounts.empty())
6127 return DAG.getUNDEF(VT);
6128
6129 // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR.
6130 // Keep going if we are hitting this case.
6131 if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
6132 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
6133
6134 unsigned EltSize = VT.getScalarSizeInBits();
6135
6136 // Use VDUP for non-constant splats. For f32 constant splats, reduce to
6137 // i32 and try again.
6138 if (hasDominantValue && EltSize <= 32) {
6139 if (!isConstant) {
6140 SDValue N;
6141
6142 // If we are VDUPing a value that comes directly from a vector, that will
6143 // cause an unnecessary move to and from a GPR, where instead we could
6144 // just use VDUPLANE. We can only do this if the lane being extracted
6145 // is at a constant index, as the VDUP from lane instructions only have
6146 // constant-index forms.
6147 ConstantSDNode *constIndex;
6148 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
6149 (constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)))) {
6150 // We need to create a new undef vector to use for the VDUPLANE if the
6151 // size of the vector from which we get the value is different than the
6152 // size of the vector that we need to create. We will insert the element
6153 // such that the register coalescer will remove unnecessary copies.
6154 if (VT != Value->getOperand(0).getValueType()) {
6155 unsigned index = constIndex->getAPIntValue().getLimitedValue() %
6156 VT.getVectorNumElements();
6157 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6158 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT),
6159 Value, DAG.getConstant(index, dl, MVT::i32)),
6160 DAG.getConstant(index, dl, MVT::i32));
6161 } else
6162 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6163 Value->getOperand(0), Value->getOperand(1));
6164 } else
6165 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value);
6166
6167 if (!usesOnlyOneValue) {
6168 // The dominant value was splatted as 'N', but we now have to insert
6169 // all differing elements.
6170 for (unsigned I = 0; I < NumElts; ++I) {
6171 if (Op.getOperand(I) == Value)
6172 continue;
6173 SmallVector<SDValue, 3> Ops;
6174 Ops.push_back(N);
6175 Ops.push_back(Op.getOperand(I));
6176 Ops.push_back(DAG.getConstant(I, dl, MVT::i32));
6177 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ops);
6178 }
6179 }
6180 return N;
6181 }
6182 if (VT.getVectorElementType().isFloatingPoint()) {
6183 SmallVector<SDValue, 8> Ops;
6184 for (unsigned i = 0; i < NumElts; ++i)
6185 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32,
6186 Op.getOperand(i)));
6187 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts);
6188 SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
6189 Val = LowerBUILD_VECTOR(Val, DAG, ST);
6190 if (Val.getNode())
6191 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6192 }
6193 if (usesOnlyOneValue) {
6194 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl);
6195 if (isConstant && Val.getNode())
6196 return DAG.getNode(ARMISD::VDUP, dl, VT, Val);
6197 }
6198 }
6199
6200 // If all elements are constants and the case above didn't get hit, fall back
6201 // to the default expansion, which will generate a load from the constant
6202 // pool.
6203 if (isConstant)
6204 return SDValue();
6205
6206 // Empirical tests suggest this is rarely worth it for vectors of length <= 2.
6207 if (NumElts >= 4) {
6208 SDValue shuffle = ReconstructShuffle(Op, DAG);
6209 if (shuffle != SDValue())
6210 return shuffle;
6211 }
6212
6213 if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
6214 // If we haven't found an efficient lowering, try splitting a 128-bit vector
6215 // into two 64-bit vectors; we might discover a better way to lower it.
6216 SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
6217 EVT ExtVT = VT.getVectorElementType();
6218 EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
6219 SDValue Lower =
6220 DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
6221 if (Lower.getOpcode() == ISD::BUILD_VECTOR)
6222 Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
6223 SDValue Upper = DAG.getBuildVector(
6224 HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
6225 if (Upper.getOpcode() == ISD::BUILD_VECTOR)
6226 Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
6227 if (Lower && Upper)
6228 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
6229 }
6230
6231 // Vectors with 32- or 64-bit elements can be built by directly assigning
6232 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands
6233 // will be legalized.
6234 if (EltSize >= 32) {
6235 // Do the expansion with floating-point types, since that is what the VFP
6236 // registers are defined to use, and since i64 is not legal.
6237 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6238 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6239 SmallVector<SDValue, 8> Ops;
6240 for (unsigned i = 0; i < NumElts; ++i)
6241 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i)));
6242 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6243 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6244 }
6245
6246 // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we
6247 // know the default expansion would otherwise fall back on something even
6248 // worse. For a vector with one or two non-undef values, that's
6249 // scalar_to_vector for the elements followed by a shuffle (provided the
6250 // shuffle is valid for the target) and materialization element by element
6251 // on the stack followed by a load for everything else.
6252 if (!isConstant && !usesOnlyOneValue) {
6253 SDValue Vec = DAG.getUNDEF(VT);
6254 for (unsigned i = 0 ; i < NumElts; ++i) {
6255 SDValue V = Op.getOperand(i);
6256 if (V.isUndef())
6257 continue;
6258 SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i32);
6259 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
6260 }
6261 return Vec;
6262 }
6263
6264 return SDValue();
6265}
6266
6267// Gather data to see if the operation can be modelled as a
6268// shuffle in combination with VEXTs.
6269SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
6270 SelectionDAG &DAG) const {
6271 assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!")((Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"
) ? static_cast<void> (0) : __assert_fail ("Op.getOpcode() == ISD::BUILD_VECTOR && \"Unknown opcode!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6271, __PRETTY_FUNCTION__))
;
6272 SDLoc dl(Op);
6273 EVT VT = Op.getValueType();
6274 unsigned NumElts = VT.getVectorNumElements();
6275
6276 struct ShuffleSourceInfo {
6277 SDValue Vec;
6278 unsigned MinElt = std::numeric_limits<unsigned>::max();
6279 unsigned MaxElt = 0;
6280
6281 // We may insert some combination of BITCASTs and VEXT nodes to force Vec to
6282 // be compatible with the shuffle we intend to construct. As a result
6283 // ShuffleVec will be some sliding window into the original Vec.
6284 SDValue ShuffleVec;
6285
6286 // Code should guarantee that element i in Vec starts at element "WindowBase
6287 // + i * WindowScale in ShuffleVec".
6288 int WindowBase = 0;
6289 int WindowScale = 1;
6290
6291 ShuffleSourceInfo(SDValue Vec) : Vec(Vec), ShuffleVec(Vec) {}
6292
6293 bool operator ==(SDValue OtherVec) { return Vec == OtherVec; }
6294 };
6295
6296 // First gather all vectors used as an immediate source for this BUILD_VECTOR
6297 // node.
6298 SmallVector<ShuffleSourceInfo, 2> Sources;
6299 for (unsigned i = 0; i < NumElts; ++i) {
6300 SDValue V = Op.getOperand(i);
6301 if (V.isUndef())
6302 continue;
6303 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) {
6304 // A shuffle can only come from building a vector from various
6305 // elements of other vectors.
6306 return SDValue();
6307 } else if (!isa<ConstantSDNode>(V.getOperand(1))) {
6308 // Furthermore, shuffles require a constant mask, whereas extractelts
6309 // accept variable indices.
6310 return SDValue();
6311 }
6312
6313 // Add this element source to the list if it's not already there.
6314 SDValue SourceVec = V.getOperand(0);
6315 auto Source = llvm::find(Sources, SourceVec);
6316 if (Source == Sources.end())
6317 Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
6318
6319 // Update the minimum and maximum lane number seen.
6320 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue();
6321 Source->MinElt = std::min(Source->MinElt, EltNo);
6322 Source->MaxElt = std::max(Source->MaxElt, EltNo);
6323 }
6324
6325 // Currently only do something sane when at most two source vectors
6326 // are involved.
6327 if (Sources.size() > 2)
6328 return SDValue();
6329
6330 // Find out the smallest element size among result and two sources, and use
6331 // it as element size to build the shuffle_vector.
6332 EVT SmallestEltTy = VT.getVectorElementType();
6333 for (auto &Source : Sources) {
6334 EVT SrcEltTy = Source.Vec.getValueType().getVectorElementType();
6335 if (SrcEltTy.bitsLT(SmallestEltTy))
6336 SmallestEltTy = SrcEltTy;
6337 }
6338 unsigned ResMultiplier =
6339 VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
6340 NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
6341 EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
6342
6343 // If the source vector is too wide or too narrow, we may nevertheless be able
6344 // to construct a compatible shuffle either by concatenating it with UNDEF or
6345 // extracting a suitable range of elements.
6346 for (auto &Src : Sources) {
6347 EVT SrcVT = Src.ShuffleVec.getValueType();
6348
6349 if (SrcVT.getSizeInBits() == VT.getSizeInBits())
6350 continue;
6351
6352 // This stage of the search produces a source with the same element type as
6353 // the original, but with a total width matching the BUILD_VECTOR output.
6354 EVT EltVT = SrcVT.getVectorElementType();
6355 unsigned NumSrcElts = VT.getSizeInBits() / EltVT.getSizeInBits();
6356 EVT DestVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumSrcElts);
6357
6358 if (SrcVT.getSizeInBits() < VT.getSizeInBits()) {
6359 if (2 * SrcVT.getSizeInBits() != VT.getSizeInBits())
6360 return SDValue();
6361 // We can pad out the smaller vector for free, so if it's part of a
6362 // shuffle...
6363 Src.ShuffleVec =
6364 DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
6365 DAG.getUNDEF(Src.ShuffleVec.getValueType()));
6366 continue;
6367 }
6368
6369 if (SrcVT.getSizeInBits() != 2 * VT.getSizeInBits())
6370 return SDValue();
6371
6372 if (Src.MaxElt - Src.MinElt >= NumSrcElts) {
6373 // Span too large for a VEXT to cope
6374 return SDValue();
6375 }
6376
6377 if (Src.MinElt >= NumSrcElts) {
6378 // The extraction can just take the second half
6379 Src.ShuffleVec =
6380 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6381 DAG.getConstant(NumSrcElts, dl, MVT::i32));
6382 Src.WindowBase = -NumSrcElts;
6383 } else if (Src.MaxElt < NumSrcElts) {
6384 // The extraction can just take the first half
6385 Src.ShuffleVec =
6386 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6387 DAG.getConstant(0, dl, MVT::i32));
6388 } else {
6389 // An actual VEXT is needed
6390 SDValue VEXTSrc1 =
6391 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6392 DAG.getConstant(0, dl, MVT::i32));
6393 SDValue VEXTSrc2 =
6394 DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
6395 DAG.getConstant(NumSrcElts, dl, MVT::i32));
6396
6397 Src.ShuffleVec = DAG.getNode(ARMISD::VEXT, dl, DestVT, VEXTSrc1,
6398 VEXTSrc2,
6399 DAG.getConstant(Src.MinElt, dl, MVT::i32));
6400 Src.WindowBase = -Src.MinElt;
6401 }
6402 }
6403
6404 // Another possible incompatibility occurs from the vector element types. We
6405 // can fix this by bitcasting the source vectors to the same type we intend
6406 // for the shuffle.
6407 for (auto &Src : Sources) {
6408 EVT SrcEltTy = Src.ShuffleVec.getValueType().getVectorElementType();
6409 if (SrcEltTy == SmallestEltTy)
6410 continue;
6411 assert(ShuffleVT.getVectorElementType() == SmallestEltTy)((ShuffleVT.getVectorElementType() == SmallestEltTy) ? static_cast
<void> (0) : __assert_fail ("ShuffleVT.getVectorElementType() == SmallestEltTy"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6411, __PRETTY_FUNCTION__))
;
6412 Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
6413 Src.WindowScale = SrcEltTy.getSizeInBits() / SmallestEltTy.getSizeInBits();
6414 Src.WindowBase *= Src.WindowScale;
6415 }
6416
6417 // Final sanity check before we try to actually produce a shuffle.
6418 DEBUG(do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6420, __PRETTY_FUNCTION__));; } } while (false)
6419 for (auto Src : Sources)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6420, __PRETTY_FUNCTION__));; } } while (false)
6420 assert(Src.ShuffleVec.getValueType() == ShuffleVT);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6420, __PRETTY_FUNCTION__));; } } while (false)
6421 )do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("arm-isel")) { for (auto Src : Sources) ((Src.ShuffleVec.getValueType
() == ShuffleVT) ? static_cast<void> (0) : __assert_fail
("Src.ShuffleVec.getValueType() == ShuffleVT", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6420, __PRETTY_FUNCTION__));; } } while (false)
;
6422
6423 // The stars all align, our next step is to produce the mask for the shuffle.
6424 SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
6425 int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
6426 for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
6427 SDValue Entry = Op.getOperand(i);
6428 if (Entry.isUndef())
6429 continue;
6430
6431 auto Src = llvm::find(Sources, Entry.getOperand(0));
6432 int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
6433
6434 // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
6435 // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
6436 // segment.
6437 EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
6438 int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
6439 VT.getScalarSizeInBits());
6440 int LanesDefined = BitsDefined / BitsPerShuffleLane;
6441
6442 // This source is expected to fill ResMultiplier lanes of the final shuffle,
6443 // starting at the appropriate offset.
6444 int *LaneMask = &Mask[i * ResMultiplier];
6445
6446 int ExtractBase = EltNo * Src->WindowScale + Src->WindowBase;
6447 ExtractBase += NumElts * (Src - Sources.begin());
6448 for (int j = 0; j < LanesDefined; ++j)
6449 LaneMask[j] = ExtractBase + j;
6450 }
6451
6452 // Final check before we try to produce nonsense...
6453 if (!isShuffleMaskLegal(Mask, ShuffleVT))
6454 return SDValue();
6455
6456 // We can't handle more than two sources. This should have already
6457 // been checked before this point.
6458 assert(Sources.size() <= 2 && "Too many sources!")((Sources.size() <= 2 && "Too many sources!") ? static_cast
<void> (0) : __assert_fail ("Sources.size() <= 2 && \"Too many sources!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6458, __PRETTY_FUNCTION__))
;
6459
6460 SDValue ShuffleOps[] = { DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT) };
6461 for (unsigned i = 0; i < Sources.size(); ++i)
6462 ShuffleOps[i] = Sources[i].ShuffleVec;
6463
6464 SDValue Shuffle = DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0],
6465 ShuffleOps[1], Mask);
6466 return DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
6467}
6468
6469/// isShuffleMaskLegal - Targets can use this to indicate that they only
6470/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
6471/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
6472/// are assumed to be legal.
6473bool
6474ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
6475 EVT VT) const {
6476 if (VT.getVectorNumElements() == 4 &&
6477 (VT.is128BitVector() || VT.is64BitVector())) {
6478 unsigned PFIndexes[4];
6479 for (unsigned i = 0; i != 4; ++i) {
6480 if (M[i] < 0)
6481 PFIndexes[i] = 8;
6482 else
6483 PFIndexes[i] = M[i];
6484 }
6485
6486 // Compute the index in the perfect shuffle table.
6487 unsigned PFTableIndex =
6488 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6489 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6490 unsigned Cost = (PFEntry >> 30);
6491
6492 if (Cost <= 4)
6493 return true;
6494 }
6495
6496 bool ReverseVEXT, isV_UNDEF;
6497 unsigned Imm, WhichResult;
6498
6499 unsigned EltSize = VT.getScalarSizeInBits();
6500 return (EltSize >= 32 ||
6501 ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
6502 isVREVMask(M, VT, 64) ||
6503 isVREVMask(M, VT, 32) ||
6504 isVREVMask(M, VT, 16) ||
6505 isVEXTMask(M, VT, ReverseVEXT, Imm) ||
6506 isVTBLMask(M, VT) ||
6507 isNEONTwoResultShuffleMask(M, VT, WhichResult, isV_UNDEF) ||
6508 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT)));
6509}
6510
6511/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
6512/// the specified operations to build the shuffle.
6513static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
6514 SDValue RHS, SelectionDAG &DAG,
6515 const SDLoc &dl) {
6516 unsigned OpNum = (PFEntry >> 26) & 0x0F;
6517 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
6518 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
6519
6520 enum {
6521 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
6522 OP_VREV,
6523 OP_VDUP0,
6524 OP_VDUP1,
6525 OP_VDUP2,
6526 OP_VDUP3,
6527 OP_VEXT1,
6528 OP_VEXT2,
6529 OP_VEXT3,
6530 OP_VUZPL, // VUZP, left result
6531 OP_VUZPR, // VUZP, right result
6532 OP_VZIPL, // VZIP, left result
6533 OP_VZIPR, // VZIP, right result
6534 OP_VTRNL, // VTRN, left result
6535 OP_VTRNR // VTRN, right result
6536 };
6537
6538 if (OpNum == OP_COPY) {
6539 if (LHSID == (1*9+2)*9+3) return LHS;
6540 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!")((LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!") ?
static_cast<void> (0) : __assert_fail ("LHSID == ((4*9+5)*9+6)*9+7 && \"Illegal OP_COPY!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6540, __PRETTY_FUNCTION__))
;
6541 return RHS;
6542 }
6543
6544 SDValue OpLHS, OpRHS;
6545 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
6546 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
6547 EVT VT = OpLHS.getValueType();
6548
6549 switch (OpNum) {
6550 default: llvm_unreachable("Unknown shuffle opcode!")::llvm::llvm_unreachable_internal("Unknown shuffle opcode!", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6550)
;
6551 case OP_VREV:
6552 // VREV divides the vector in half and swaps within the half.
6553 if (VT.getVectorElementType() == MVT::i32 ||
6554 VT.getVectorElementType() == MVT::f32)
6555 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS);
6556 // vrev <4 x i16> -> VREV32
6557 if (VT.getVectorElementType() == MVT::i16)
6558 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS);
6559 // vrev <4 x i8> -> VREV16
6560 assert(VT.getVectorElementType() == MVT::i8)((VT.getVectorElementType() == MVT::i8) ? static_cast<void
> (0) : __assert_fail ("VT.getVectorElementType() == MVT::i8"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6560, __PRETTY_FUNCTION__))
;
6561 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS);
6562 case OP_VDUP0:
6563 case OP_VDUP1:
6564 case OP_VDUP2:
6565 case OP_VDUP3:
6566 return DAG.getNode(ARMISD::VDUPLANE, dl, VT,
6567 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, dl, MVT::i32));
6568 case OP_VEXT1:
6569 case OP_VEXT2:
6570 case OP_VEXT3:
6571 return DAG.getNode(ARMISD::VEXT, dl, VT,
6572 OpLHS, OpRHS,
6573 DAG.getConstant(OpNum - OP_VEXT1 + 1, dl, MVT::i32));
6574 case OP_VUZPL:
6575 case OP_VUZPR:
6576 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
6577 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL);
6578 case OP_VZIPL:
6579 case OP_VZIPR:
6580 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
6581 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL);
6582 case OP_VTRNL:
6583 case OP_VTRNR:
6584 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
6585 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL);
6586 }
6587}
6588
6589static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op,
6590 ArrayRef<int> ShuffleMask,
6591 SelectionDAG &DAG) {
6592 // Check to see if we can use the VTBL instruction.
6593 SDValue V1 = Op.getOperand(0);
6594 SDValue V2 = Op.getOperand(1);
6595 SDLoc DL(Op);
6596
6597 SmallVector<SDValue, 8> VTBLMask;
6598 for (ArrayRef<int>::iterator
6599 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I)
6600 VTBLMask.push_back(DAG.getConstant(*I, DL, MVT::i32));
6601
6602 if (V2.getNode()->isUndef())
6603 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1,
6604 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6605
6606 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2,
6607 DAG.getBuildVector(MVT::v8i8, DL, VTBLMask));
6608}
6609
6610static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op,
6611 SelectionDAG &DAG) {
6612 SDLoc DL(Op);
6613 SDValue OpLHS = Op.getOperand(0);
6614 EVT VT = OpLHS.getValueType();
6615
6616 assert((VT == MVT::v8i16 || VT == MVT::v16i8) &&(((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6617, __PRETTY_FUNCTION__))
6617 "Expect an v8i16/v16i8 type")(((VT == MVT::v8i16 || VT == MVT::v16i8) && "Expect an v8i16/v16i8 type"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v8i16 || VT == MVT::v16i8) && \"Expect an v8i16/v16i8 type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6617, __PRETTY_FUNCTION__))
;
6618 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS);
6619 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now,
6620 // extract the first 8 bytes into the top double word and the last 8 bytes
6621 // into the bottom double word. The v8i16 case is similar.
6622 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4;
6623 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS,
6624 DAG.getConstant(ExtractNum, DL, MVT::i32));
6625}
6626
6627static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
6628 SDValue V1 = Op.getOperand(0);
6629 SDValue V2 = Op.getOperand(1);
6630 SDLoc dl(Op);
6631 EVT VT = Op.getValueType();
6632 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
6633
6634 // Convert shuffles that are directly supported on NEON to target-specific
6635 // DAG nodes, instead of keeping them as shuffles and matching them again
6636 // during code selection. This is more efficient and avoids the possibility
6637 // of inconsistencies between legalization and selection.
6638 // FIXME: floating-point vectors should be canonicalized to integer vectors
6639 // of the same time so that they get CSEd properly.
6640 ArrayRef<int> ShuffleMask = SVN->getMask();
6641
6642 unsigned EltSize = VT.getScalarSizeInBits();
6643 if (EltSize <= 32) {
6644 if (SVN->isSplat()) {
6645 int Lane = SVN->getSplatIndex();
6646 // If this is undef splat, generate it via "just" vdup, if possible.
6647 if (Lane == -1) Lane = 0;
6648
6649 // Test if V1 is a SCALAR_TO_VECTOR.
6650 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) {
6651 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6652 }
6653 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR
6654 // (and probably will turn into a SCALAR_TO_VECTOR once legalization
6655 // reaches it).
6656 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
6657 !isa<ConstantSDNode>(V1.getOperand(0))) {
6658 bool IsScalarToVector = true;
6659 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
6660 if (!V1.getOperand(i).isUndef()) {
6661 IsScalarToVector = false;
6662 break;
6663 }
6664 if (IsScalarToVector)
6665 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0));
6666 }
6667 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1,
6668 DAG.getConstant(Lane, dl, MVT::i32));
6669 }
6670
6671 bool ReverseVEXT;
6672 unsigned Imm;
6673 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) {
6674 if (ReverseVEXT)
6675 std::swap(V1, V2);
6676 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2,
6677 DAG.getConstant(Imm, dl, MVT::i32));
6678 }
6679
6680 if (isVREVMask(ShuffleMask, VT, 64))
6681 return DAG.getNode(ARMISD::VREV64, dl, VT, V1);
6682 if (isVREVMask(ShuffleMask, VT, 32))
6683 return DAG.getNode(ARMISD::VREV32, dl, VT, V1);
6684 if (isVREVMask(ShuffleMask, VT, 16))
6685 return DAG.getNode(ARMISD::VREV16, dl, VT, V1);
6686
6687 if (V2->isUndef() && isSingletonVEXTMask(ShuffleMask, VT, Imm)) {
6688 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1,
6689 DAG.getConstant(Imm, dl, MVT::i32));
6690 }
6691
6692 // Check for Neon shuffles that modify both input vectors in place.
6693 // If both results are used, i.e., if there are two shuffles with the same
6694 // source operands and with masks corresponding to both results of one of
6695 // these operations, DAG memoization will ensure that a single node is
6696 // used for both shuffles.
6697 unsigned WhichResult;
6698 bool isV_UNDEF;
6699 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6700 ShuffleMask, VT, WhichResult, isV_UNDEF)) {
6701 if (isV_UNDEF)
6702 V2 = V1;
6703 return DAG.getNode(ShuffleOpc, dl, DAG.getVTList(VT, VT), V1, V2)
6704 .getValue(WhichResult);
6705 }
6706
6707 // Also check for these shuffles through CONCAT_VECTORS: we canonicalize
6708 // shuffles that produce a result larger than their operands with:
6709 // shuffle(concat(v1, undef), concat(v2, undef))
6710 // ->
6711 // shuffle(concat(v1, v2), undef)
6712 // because we can access quad vectors (see PerformVECTOR_SHUFFLECombine).
6713 //
6714 // This is useful in the general case, but there are special cases where
6715 // native shuffles produce larger results: the two-result ops.
6716 //
6717 // Look through the concat when lowering them:
6718 // shuffle(concat(v1, v2), undef)
6719 // ->
6720 // concat(VZIP(v1, v2):0, :1)
6721 //
6722 if (V1->getOpcode() == ISD::CONCAT_VECTORS && V2->isUndef()) {
6723 SDValue SubV1 = V1->getOperand(0);
6724 SDValue SubV2 = V1->getOperand(1);
6725 EVT SubVT = SubV1.getValueType();
6726
6727 // We expect these to have been canonicalized to -1.
6728 assert(llvm::all_of(ShuffleMask, [&](int i) {((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6730, __PRETTY_FUNCTION__))
6729 return i < (int)VT.getVectorNumElements();((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6730, __PRETTY_FUNCTION__))
6730 }) && "Unexpected shuffle index into UNDEF operand!")((llvm::all_of(ShuffleMask, [&](int i) { return i < (int
)VT.getVectorNumElements(); }) && "Unexpected shuffle index into UNDEF operand!"
) ? static_cast<void> (0) : __assert_fail ("llvm::all_of(ShuffleMask, [&](int i) { return i < (int)VT.getVectorNumElements(); }) && \"Unexpected shuffle index into UNDEF operand!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6730, __PRETTY_FUNCTION__))
;
6731
6732 if (unsigned ShuffleOpc = isNEONTwoResultShuffleMask(
6733 ShuffleMask, SubVT, WhichResult, isV_UNDEF)) {
6734 if (isV_UNDEF)
6735 SubV2 = SubV1;
6736 assert((WhichResult == 0) &&(((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? static_cast<void> (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6737, __PRETTY_FUNCTION__))
6737 "In-place shuffle of concat can only have one result!")(((WhichResult == 0) && "In-place shuffle of concat can only have one result!"
) ? static_cast<void> (0) : __assert_fail ("(WhichResult == 0) && \"In-place shuffle of concat can only have one result!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6737, __PRETTY_FUNCTION__))
;
6738 SDValue Res = DAG.getNode(ShuffleOpc, dl, DAG.getVTList(SubVT, SubVT),
6739 SubV1, SubV2);
6740 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Res.getValue(0),
6741 Res.getValue(1));
6742 }
6743 }
6744 }
6745
6746 // If the shuffle is not directly supported and it has 4 elements, use
6747 // the PerfectShuffle-generated table to synthesize it from other shuffles.
6748 unsigned NumElts = VT.getVectorNumElements();
6749 if (NumElts == 4) {
6750 unsigned PFIndexes[4];
6751 for (unsigned i = 0; i != 4; ++i) {
6752 if (ShuffleMask[i] < 0)
6753 PFIndexes[i] = 8;
6754 else
6755 PFIndexes[i] = ShuffleMask[i];
6756 }
6757
6758 // Compute the index in the perfect shuffle table.
6759 unsigned PFTableIndex =
6760 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
6761 unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
6762 unsigned Cost = (PFEntry >> 30);
6763
6764 if (Cost <= 4)
6765 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
6766 }
6767
6768 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs.
6769 if (EltSize >= 32) {
6770 // Do the expansion with floating-point types, since that is what the VFP
6771 // registers are defined to use, and since i64 is not legal.
6772 EVT EltVT = EVT::getFloatingPointVT(EltSize);
6773 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts);
6774 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1);
6775 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2);
6776 SmallVector<SDValue, 8> Ops;
6777 for (unsigned i = 0; i < NumElts; ++i) {
6778 if (ShuffleMask[i] < 0)
6779 Ops.push_back(DAG.getUNDEF(EltVT));
6780 else
6781 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT,
6782 ShuffleMask[i] < (int)NumElts ? V1 : V2,
6783 DAG.getConstant(ShuffleMask[i] & (NumElts-1),
6784 dl, MVT::i32)));
6785 }
6786 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, Ops);
6787 return DAG.getNode(ISD::BITCAST, dl, VT, Val);
6788 }
6789
6790 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT))
6791 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG);
6792
6793 if (VT == MVT::v8i8)
6794 if (SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG))
6795 return NewOp;
6796
6797 return SDValue();
6798}
6799
6800static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
6801 // INSERT_VECTOR_ELT is legal only for immediate indexes.
6802 SDValue Lane = Op.getOperand(2);
6803 if (!isa<ConstantSDNode>(Lane))
6804 return SDValue();
6805
6806 return Op;
6807}
6808
6809static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
6810 // EXTRACT_VECTOR_ELT is legal only for immediate indexes.
6811 SDValue Lane = Op.getOperand(1);
6812 if (!isa<ConstantSDNode>(Lane))
6813 return SDValue();
6814
6815 SDValue Vec = Op.getOperand(0);
6816 if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
6817 SDLoc dl(Op);
6818 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
6819 }
6820
6821 return Op;
6822}
6823
6824static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) {
6825 // The only time a CONCAT_VECTORS operation can have legal types is when
6826 // two 64-bit vectors are concatenated to a 128-bit vector.
6827 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 &&((Op.getValueType().is128BitVector() && Op.getNumOperands
() == 2 && "unexpected CONCAT_VECTORS") ? static_cast
<void> (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6828, __PRETTY_FUNCTION__))
6828 "unexpected CONCAT_VECTORS")((Op.getValueType().is128BitVector() && Op.getNumOperands
() == 2 && "unexpected CONCAT_VECTORS") ? static_cast
<void> (0) : __assert_fail ("Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && \"unexpected CONCAT_VECTORS\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6828, __PRETTY_FUNCTION__))
;
6829 SDLoc dl(Op);
6830 SDValue Val = DAG.getUNDEF(MVT::v2f64);
6831 SDValue Op0 = Op.getOperand(0);
6832 SDValue Op1 = Op.getOperand(1);
6833 if (!Op0.isUndef())
6834 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6835 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0),
6836 DAG.getIntPtrConstant(0, dl));
6837 if (!Op1.isUndef())
6838 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val,
6839 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1),
6840 DAG.getIntPtrConstant(1, dl));
6841 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val);
6842}
6843
6844/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each
6845/// element has been zero/sign-extended, depending on the isSigned parameter,
6846/// from an integer type half its size.
6847static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
6848 bool isSigned) {
6849 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32.
6850 EVT VT = N->getValueType(0);
6851 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) {
6852 SDNode *BVN = N->getOperand(0).getNode();
6853 if (BVN->getValueType(0) != MVT::v4i32 ||
6854 BVN->getOpcode() != ISD::BUILD_VECTOR)
6855 return false;
6856 unsigned LoElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
6857 unsigned HiElt = 1 - LoElt;
6858 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt));
6859 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt));
6860 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2));
6861 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2));
6862 if (!Lo0 || !Hi0 || !Lo1 || !Hi1)
6863 return false;
6864 if (isSigned) {
6865 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 &&
6866 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32)
6867 return true;
6868 } else {
6869 if (Hi0->isNullValue() && Hi1->isNullValue())
6870 return true;
6871 }
6872 return false;
6873 }
6874
6875 if (N->getOpcode() != ISD::BUILD_VECTOR)
6876 return false;
6877
6878 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
6879 SDNode *Elt = N->getOperand(i).getNode();
6880 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
6881 unsigned EltSize = VT.getScalarSizeInBits();
6882 unsigned HalfSize = EltSize / 2;
6883 if (isSigned) {
6884 if (!isIntN(HalfSize, C->getSExtValue()))
6885 return false;
6886 } else {
6887 if (!isUIntN(HalfSize, C->getZExtValue()))
6888 return false;
6889 }
6890 continue;
6891 }
6892 return false;
6893 }
6894
6895 return true;
6896}
6897
6898/// isSignExtended - Check if a node is a vector value that is sign-extended
6899/// or a constant BUILD_VECTOR with sign-extended elements.
6900static bool isSignExtended(SDNode *N, SelectionDAG &DAG) {
6901 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N))
6902 return true;
6903 if (isExtendedBUILD_VECTOR(N, DAG, true))
6904 return true;
6905 return false;
6906}
6907
6908/// isZeroExtended - Check if a node is a vector value that is zero-extended
6909/// or a constant BUILD_VECTOR with zero-extended elements.
6910static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) {
6911 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N))
6912 return true;
6913 if (isExtendedBUILD_VECTOR(N, DAG, false))
6914 return true;
6915 return false;
6916}
6917
6918static EVT getExtensionTo64Bits(const EVT &OrigVT) {
6919 if (OrigVT.getSizeInBits() >= 64)
6920 return OrigVT;
6921
6922 assert(OrigVT.isSimple() && "Expecting a simple value type")((OrigVT.isSimple() && "Expecting a simple value type"
) ? static_cast<void> (0) : __assert_fail ("OrigVT.isSimple() && \"Expecting a simple value type\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6922, __PRETTY_FUNCTION__))
;
6923
6924 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy;
6925 switch (OrigSimpleTy) {
6926 default: llvm_unreachable("Unexpected Vector Type")::llvm::llvm_unreachable_internal("Unexpected Vector Type", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6926)
;
6927 case MVT::v2i8:
6928 case MVT::v2i16:
6929 return MVT::v2i32;
6930 case MVT::v4i8:
6931 return MVT::v4i16;
6932 }
6933}
6934
6935/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total
6936/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL.
6937/// We insert the required extension here to get the vector to fill a D register.
6938static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG,
6939 const EVT &OrigTy,
6940 const EVT &ExtTy,
6941 unsigned ExtOpcode) {
6942 // The vector originally had a size of OrigTy. It was then extended to ExtTy.
6943 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than
6944 // 64-bits we need to insert a new extension so that it will be 64-bits.
6945 assert(ExtTy.is128BitVector() && "Unexpected extension size")((ExtTy.is128BitVector() && "Unexpected extension size"
) ? static_cast<void> (0) : __assert_fail ("ExtTy.is128BitVector() && \"Unexpected extension size\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6945, __PRETTY_FUNCTION__))
;
6946 if (OrigTy.getSizeInBits() >= 64)
6947 return N;
6948
6949 // Must extend size to at least 64 bits to be used as an operand for VMULL.
6950 EVT NewVT = getExtensionTo64Bits(OrigTy);
6951
6952 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N);
6953}
6954
6955/// SkipLoadExtensionForVMULL - return a load of the original vector size that
6956/// does not do any sign/zero extension. If the original vector is less
6957/// than 64 bits, an appropriate extension will be added after the load to
6958/// reach a total size of 64 bits. We have to add the extension separately
6959/// because ARM does not have a sign/zero extending load for vectors.
6960static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) {
6961 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT());
6962
6963 // The load already has the right type.
6964 if (ExtendedTy == LD->getMemoryVT())
6965 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(),
6966 LD->getBasePtr(), LD->getPointerInfo(),
6967 LD->getAlignment(), LD->getMemOperand()->getFlags());
6968
6969 // We need to create a zextload/sextload. We cannot just create a load
6970 // followed by a zext/zext node because LowerMUL is also run during normal
6971 // operation legalization where we can't create illegal types.
6972 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy,
6973 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(),
6974 LD->getMemoryVT(), LD->getAlignment(),
6975 LD->getMemOperand()->getFlags());
6976}
6977
6978/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND,
6979/// extending load, or BUILD_VECTOR with extended elements, return the
6980/// unextended value. The unextended vector should be 64 bits so that it can
6981/// be used as an operand to a VMULL instruction. If the original vector size
6982/// before extension is less than 64 bits we add a an extension to resize
6983/// the vector to 64 bits.
6984static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
6985 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND)
6986 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG,
6987 N->getOperand(0)->getValueType(0),
6988 N->getValueType(0),
6989 N->getOpcode());
6990
6991 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
6992 return SkipLoadExtensionForVMULL(LD, DAG);
6993
6994 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will
6995 // have been legalized as a BITCAST from v4i32.
6996 if (N->getOpcode() == ISD::BITCAST) {
6997 SDNode *BVN = N->getOperand(0).getNode();
6998 assert(BVN->getOpcode() == ISD::BUILD_VECTOR &&((BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->
getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6999, __PRETTY_FUNCTION__))
6999 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR")((BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->
getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && \"expected v4i32 BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 6999, __PRETTY_FUNCTION__))
;
7000 unsigned LowElt = DAG.getDataLayout().isBigEndian() ? 1 : 0;
7001 return DAG.getBuildVector(
7002 MVT::v2i32, SDLoc(N),
7003 {BVN->getOperand(LowElt), BVN->getOperand(LowElt + 2)});
7004 }
7005 // Construct a new BUILD_VECTOR with elements truncated to half the size.
7006 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR")((N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"
) ? static_cast<void> (0) : __assert_fail ("N->getOpcode() == ISD::BUILD_VECTOR && \"expected BUILD_VECTOR\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7006, __PRETTY_FUNCTION__))
;
7007 EVT VT = N->getValueType(0);
7008 unsigned EltSize = VT.getScalarSizeInBits() / 2;
7009 unsigned NumElts = VT.getVectorNumElements();
7010 MVT TruncVT = MVT::getIntegerVT(EltSize);
7011 SmallVector<SDValue, 8> Ops;
7012 SDLoc dl(N);
7013 for (unsigned i = 0; i != NumElts; ++i) {
7014 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i));
7015 const APInt &CInt = C->getAPIntValue();
7016 // Element types smaller than 32 bits are not legal, so use i32 elements.
7017 // The values are implicitly truncated so sext vs. zext doesn't matter.
7018 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), dl, MVT::i32));
7019 }
7020 return DAG.getBuildVector(MVT::getVectorVT(TruncVT, NumElts), dl, Ops);
7021}
7022
7023static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) {
7024 unsigned Opcode = N->getOpcode();
7025 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7026 SDNode *N0 = N->getOperand(0).getNode();
7027 SDNode *N1 = N->getOperand(1).getNode();
7028 return N0->hasOneUse() && N1->hasOneUse() &&
7029 isSignExtended(N0, DAG) && isSignExtended(N1, DAG);
7030 }
7031 return false;
7032}
7033
7034static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) {
7035 unsigned Opcode = N->getOpcode();
7036 if (Opcode == ISD::ADD || Opcode == ISD::SUB) {
7037 SDNode *N0 = N->getOperand(0).getNode();
7038 SDNode *N1 = N->getOperand(1).getNode();
7039 return N0->hasOneUse() && N1->hasOneUse() &&
7040 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG);
7041 }
7042 return false;
7043}
7044
7045static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) {
7046 // Multiplications are only custom-lowered for 128-bit vectors so that
7047 // VMULL can be detected. Otherwise v2i64 multiplications are not legal.
7048 EVT VT = Op.getValueType();
7049 assert(VT.is128BitVector() && VT.isInteger() &&((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7050, __PRETTY_FUNCTION__))
7050 "unexpected type for custom-lowering ISD::MUL")((VT.is128BitVector() && VT.isInteger() && "unexpected type for custom-lowering ISD::MUL"
) ? static_cast<void> (0) : __assert_fail ("VT.is128BitVector() && VT.isInteger() && \"unexpected type for custom-lowering ISD::MUL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7050, __PRETTY_FUNCTION__))
;
7051 SDNode *N0 = Op.getOperand(0).getNode();
7052 SDNode *N1 = Op.getOperand(1).getNode();
7053 unsigned NewOpc = 0;
7054 bool isMLA = false;
7055 bool isN0SExt = isSignExtended(N0, DAG);
7056 bool isN1SExt = isSignExtended(N1, DAG);
7057 if (isN0SExt && isN1SExt)
7058 NewOpc = ARMISD::VMULLs;
7059 else {
7060 bool isN0ZExt = isZeroExtended(N0, DAG);
7061 bool isN1ZExt = isZeroExtended(N1, DAG);
7062 if (isN0ZExt && isN1ZExt)
7063 NewOpc = ARMISD::VMULLu;
7064 else if (isN1SExt || isN1ZExt) {
7065 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these
7066 // into (s/zext A * s/zext C) + (s/zext B * s/zext C)
7067 if (isN1SExt && isAddSubSExt(N0, DAG)) {
7068 NewOpc = ARMISD::VMULLs;
7069 isMLA = true;
7070 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) {
7071 NewOpc = ARMISD::VMULLu;
7072 isMLA = true;
7073 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) {
7074 std::swap(N0, N1);
7075 NewOpc = ARMISD::VMULLu;
7076 isMLA = true;
7077 }
7078 }
7079
7080 if (!NewOpc) {
7081 if (VT == MVT::v2i64)
7082 // Fall through to expand this. It is not legal.
7083 return SDValue();
7084 else
7085 // Other vector multiplications are legal.
7086 return Op;
7087 }
7088 }
7089
7090 // Legalize to a VMULL instruction.
7091 SDLoc DL(Op);
7092 SDValue Op0;
7093 SDValue Op1 = SkipExtensionForVMULL(N1, DAG);
7094 if (!isMLA) {
7095 Op0 = SkipExtensionForVMULL(N0, DAG);
7096 assert(Op0.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7098, __PRETTY_FUNCTION__))
7097 Op1.getValueType().is64BitVector() &&((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7098, __PRETTY_FUNCTION__))
7098 "unexpected types for extended operands to VMULL")((Op0.getValueType().is64BitVector() && Op1.getValueType
().is64BitVector() && "unexpected types for extended operands to VMULL"
) ? static_cast<void> (0) : __assert_fail ("Op0.getValueType().is64BitVector() && Op1.getValueType().is64BitVector() && \"unexpected types for extended operands to VMULL\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7098, __PRETTY_FUNCTION__))
;
7099 return DAG.getNode(NewOpc, DL, VT, Op0, Op1);
7100 }
7101
7102 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during
7103 // isel lowering to take advantage of no-stall back to back vmul + vmla.
7104 // vmull q0, d4, d6
7105 // vmlal q0, d5, d6
7106 // is faster than
7107 // vaddl q0, d4, d5
7108 // vmovl q1, d6
7109 // vmul q0, q0, q1
7110 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG);
7111 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG);
7112 EVT Op1VT = Op1.getValueType();
7113 return DAG.getNode(N0->getOpcode(), DL, VT,
7114 DAG.getNode(NewOpc, DL, VT,
7115 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1),
7116 DAG.getNode(NewOpc, DL, VT,
7117 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1));
7118}
7119
7120static SDValue LowerSDIV_v4i8(SDValue X, SDValue Y, const SDLoc &dl,
7121 SelectionDAG &DAG) {
7122 // TODO: Should this propagate fast-math-flags?
7123
7124 // Convert to float
7125 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo));
7126 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo));
7127 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X);
7128 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y);
7129 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X);
7130 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y);
7131 // Get reciprocal estimate.
7132 // float4 recip = vrecpeq_f32(yf);
7133 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7134 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7135 Y);
7136 // Because char has a smaller range than uchar, we can actually get away
7137 // without any newton steps. This requires that we use a weird bias
7138 // of 0xb000, however (again, this has been exhaustively tested).
7139 // float4 result = as_float4(as_int4(xf*recip) + 0xb000);
7140 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y);
7141 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X);
7142 Y = DAG.getConstant(0xb000, dl, MVT::v4i32);
7143 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y);
7144 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X);
7145 // Convert back to short.
7146 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X);
7147 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X);
7148 return X;
7149}
7150
7151static SDValue LowerSDIV_v4i16(SDValue N0, SDValue N1, const SDLoc &dl,
7152 SelectionDAG &DAG) {
7153 // TODO: Should this propagate fast-math-flags?
7154
7155 SDValue N2;
7156 // Convert to float.
7157 // float4 yf = vcvt_f32_s32(vmovl_s16(y));
7158 // float4 xf = vcvt_f32_s32(vmovl_s16(x));
7159 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0);
7160 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1);
7161 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7162 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7163
7164 // Use reciprocal estimate and one refinement step.
7165 // float4 recip = vrecpeq_f32(yf);
7166 // recip *= vrecpsq_f32(yf, recip);
7167 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7168 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7169 N1);
7170 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7171 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7172 N1, N2);
7173 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7174 // Because short has a smaller range than ushort, we can actually get away
7175 // with only a single newton step. This requires that we use a weird bias
7176 // of 89, however (again, this has been exhaustively tested).
7177 // float4 result = as_float4(as_int4(xf*recip) + 0x89);
7178 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7179 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7180 N1 = DAG.getConstant(0x89, dl, MVT::v4i32);
7181 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7182 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7183 // Convert back to integer and return.
7184 // return vmovn_s32(vcvt_s32_f32(result));
7185 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7186 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7187 return N0;
7188}
7189
7190static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) {
7191 EVT VT = Op.getValueType();
7192 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7193, __PRETTY_FUNCTION__))
7193 "unexpected type for custom-lowering ISD::SDIV")(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::SDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7193, __PRETTY_FUNCTION__))
;
7194
7195 SDLoc dl(Op);
7196 SDValue N0 = Op.getOperand(0);
7197 SDValue N1 = Op.getOperand(1);
7198 SDValue N2, N3;
7199
7200 if (VT == MVT::v8i8) {
7201 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0);
7202 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1);
7203
7204 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7205 DAG.getIntPtrConstant(4, dl));
7206 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7207 DAG.getIntPtrConstant(4, dl));
7208 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7209 DAG.getIntPtrConstant(0, dl));
7210 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7211 DAG.getIntPtrConstant(0, dl));
7212
7213 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16
7214 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16
7215
7216 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7217 N0 = LowerCONCAT_VECTORS(N0, DAG);
7218
7219 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0);
7220 return N0;
7221 }
7222 return LowerSDIV_v4i16(N0, N1, dl, DAG);
7223}
7224
7225static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) {
7226 // TODO: Should this propagate fast-math-flags?
7227 EVT VT = Op.getValueType();
7228 assert((VT == MVT::v4i16 || VT == MVT::v8i8) &&(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7229, __PRETTY_FUNCTION__))
7229 "unexpected type for custom-lowering ISD::UDIV")(((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::v4i16 || VT == MVT::v8i8) && \"unexpected type for custom-lowering ISD::UDIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7229, __PRETTY_FUNCTION__))
;
7230
7231 SDLoc dl(Op);
7232 SDValue N0 = Op.getOperand(0);
7233 SDValue N1 = Op.getOperand(1);
7234 SDValue N2, N3;
7235
7236 if (VT == MVT::v8i8) {
7237 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0);
7238 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1);
7239
7240 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7241 DAG.getIntPtrConstant(4, dl));
7242 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7243 DAG.getIntPtrConstant(4, dl));
7244 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0,
7245 DAG.getIntPtrConstant(0, dl));
7246 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1,
7247 DAG.getIntPtrConstant(0, dl));
7248
7249 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16
7250 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16
7251
7252 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2);
7253 N0 = LowerCONCAT_VECTORS(N0, DAG);
7254
7255 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8,
7256 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, dl,
7257 MVT::i32),
7258 N0);
7259 return N0;
7260 }
7261
7262 // v4i16 sdiv ... Convert to float.
7263 // float4 yf = vcvt_f32_s32(vmovl_u16(y));
7264 // float4 xf = vcvt_f32_s32(vmovl_u16(x));
7265 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0);
7266 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1);
7267 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0);
7268 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1);
7269
7270 // Use reciprocal estimate and two refinement steps.
7271 // float4 recip = vrecpeq_f32(yf);
7272 // recip *= vrecpsq_f32(yf, recip);
7273 // recip *= vrecpsq_f32(yf, recip);
7274 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7275 DAG.getConstant(Intrinsic::arm_neon_vrecpe, dl, MVT::i32),
7276 BN1);
7277 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7278 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7279 BN1, N2);
7280 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7281 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32,
7282 DAG.getConstant(Intrinsic::arm_neon_vrecps, dl, MVT::i32),
7283 BN1, N2);
7284 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2);
7285 // Simply multiplying by the reciprocal estimate can leave us a few ulps
7286 // too low, so we add 2 ulps (exhaustive testing shows that this is enough,
7287 // and that it will never cause us to return an answer too large).
7288 // float4 result = as_float4(as_int4(xf*recip) + 2);
7289 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2);
7290 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0);
7291 N1 = DAG.getConstant(2, dl, MVT::v4i32);
7292 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1);
7293 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0);
7294 // Convert back to integer and return.
7295 // return vmovn_u32(vcvt_s32_f32(result));
7296 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0);
7297 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0);
7298 return N0;
7299}
7300
7301static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) {
7302 EVT VT = Op.getNode()->getValueType(0);
7303 SDVTList VTs = DAG.getVTList(VT, MVT::i32);
7304
7305 unsigned Opc;
7306 bool ExtraOp = false;
7307 switch (Op.getOpcode()) {
7308 default: llvm_unreachable("Invalid code")::llvm::llvm_unreachable_internal("Invalid code", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7308)
;
7309 case ISD::ADDC: Opc = ARMISD::ADDC; break;
7310 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break;
7311 case ISD::SUBC: Opc = ARMISD::SUBC; break;
7312 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break;
7313 }
7314
7315 if (!ExtraOp)
7316 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7317 Op.getOperand(1));
7318 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0),
7319 Op.getOperand(1), Op.getOperand(2));
7320}
7321
7322SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
7323 assert(Subtarget->isTargetDarwin())((Subtarget->isTargetDarwin()) ? static_cast<void> (
0) : __assert_fail ("Subtarget->isTargetDarwin()", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7323, __PRETTY_FUNCTION__))
;
7324
7325 // For iOS, we want to call an alternative entry point: __sincos_stret,
7326 // return values are passed via sret.
7327 SDLoc dl(Op);
7328 SDValue Arg = Op.getOperand(0);
7329 EVT ArgVT = Arg.getValueType();
7330 Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
7331 auto PtrVT = getPointerTy(DAG.getDataLayout());
7332
7333 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
7334 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
7335
7336 // Pair of floats / doubles used to pass the result.
7337 Type *RetTy = StructType::get(ArgTy, ArgTy, nullptr);
7338 auto &DL = DAG.getDataLayout();
7339
7340 ArgListTy Args;
7341 bool ShouldUseSRet = Subtarget->isAPCS_ABI();
7342 SDValue SRet;
7343 if (ShouldUseSRet) {
7344 // Create stack object for sret.
7345 const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
7346 const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
7347 int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
7348 SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
7349
7350 ArgListEntry Entry;
7351 Entry.Node = SRet;
7352 Entry.Ty = RetTy->getPointerTo();
7353 Entry.IsSExt = false;
7354 Entry.IsZExt = false;
7355 Entry.IsSRet = true;
7356 Args.push_back(Entry);
7357 RetTy = Type::getVoidTy(*DAG.getContext());
7358 }
7359
7360 ArgListEntry Entry;
7361 Entry.Node = Arg;
7362 Entry.Ty = ArgTy;
7363 Entry.IsSExt = false;
7364 Entry.IsZExt = false;
7365 Args.push_back(Entry);
7366
7367 const char *LibcallName =
7368 (ArgVT == MVT::f64) ? "__sincos_stret" : "__sincosf_stret";
7369 RTLIB::Libcall LC =
7370 (ArgVT == MVT::f64) ? RTLIB::SINCOS_F64 : RTLIB::SINCOS_F32;
7371 CallingConv::ID CC = getLibcallCallingConv(LC);
7372 SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy(DL));
7373
7374 TargetLowering::CallLoweringInfo CLI(DAG);
7375 CLI.setDebugLoc(dl)
7376 .setChain(DAG.getEntryNode())
7377 .setCallee(CC, RetTy, Callee, std::move(Args))
7378 .setDiscardResult(ShouldUseSRet);
7379 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
7380
7381 if (!ShouldUseSRet)
7382 return CallResult.first;
7383
7384 SDValue LoadSin =
7385 DAG.getLoad(ArgVT, dl, CallResult.second, SRet, MachinePointerInfo());
7386
7387 // Address of cos field.
7388 SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, SRet,
7389 DAG.getIntPtrConstant(ArgVT.getStoreSize(), dl));
7390 SDValue LoadCos =
7391 DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, MachinePointerInfo());
7392
7393 SDVTList Tys = DAG.getVTList(ArgVT, ArgVT);
7394 return DAG.getNode(ISD::MERGE_VALUES, dl, Tys,
7395 LoadSin.getValue(0), LoadCos.getValue(0));
7396}
7397
7398SDValue ARMTargetLowering::LowerWindowsDIVLibCall(SDValue Op, SelectionDAG &DAG,
7399 bool Signed,
7400 SDValue &Chain) const {
7401 EVT VT = Op.getValueType();
7402 assert((VT == MVT::i32 || VT == MVT::i64) &&(((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7403, __PRETTY_FUNCTION__))
7403 "unexpected type for custom lowering DIV")(((VT == MVT::i32 || VT == MVT::i64) && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("(VT == MVT::i32 || VT == MVT::i64) && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7403, __PRETTY_FUNCTION__))
;
7404 SDLoc dl(Op);
7405
7406 const auto &DL = DAG.getDataLayout();
7407 const auto &TLI = DAG.getTargetLoweringInfo();
7408
7409 const char *Name = nullptr;
7410 if (Signed)
7411 Name = (VT == MVT::i32) ? "__rt_sdiv" : "__rt_sdiv64";
7412 else
7413 Name = (VT == MVT::i32) ? "__rt_udiv" : "__rt_udiv64";
7414
7415 SDValue ES = DAG.getExternalSymbol(Name, TLI.getPointerTy(DL));
7416
7417 ARMTargetLowering::ArgListTy Args;
7418
7419 for (auto AI : {1, 0}) {
7420 ArgListEntry Arg;
7421 Arg.Node = Op.getOperand(AI);
7422 Arg.Ty = Arg.Node.getValueType().getTypeForEVT(*DAG.getContext());
7423 Args.push_back(Arg);
7424 }
7425
7426 CallLoweringInfo CLI(DAG);
7427 CLI.setDebugLoc(dl)
7428 .setChain(Chain)
7429 .setCallee(CallingConv::ARM_AAPCS_VFP, VT.getTypeForEVT(*DAG.getContext()),
7430 ES, std::move(Args));
7431
7432 return LowerCallTo(CLI).first;
7433}
7434
7435SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
7436 bool Signed) const {
7437 assert(Op.getValueType() == MVT::i32 &&((Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7438, __PRETTY_FUNCTION__))
7438 "unexpected type for custom lowering DIV")((Op.getValueType() == MVT::i32 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i32 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7438, __PRETTY_FUNCTION__))
;
7439 SDLoc dl(Op);
7440
7441 SDValue DBZCHK = DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other,
7442 DAG.getEntryNode(), Op.getOperand(1));
7443
7444 return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7445}
7446
7447static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
7448 SDLoc DL(N);
7449 SDValue Op = N->getOperand(1);
7450 if (N->getValueType(0) == MVT::i32)
7451 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
7452 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7453 DAG.getConstant(0, DL, MVT::i32));
7454 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
7455 DAG.getConstant(1, DL, MVT::i32));
7456 return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
7457 DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
7458}
7459
7460void ARMTargetLowering::ExpandDIV_Windows(
7461 SDValue Op, SelectionDAG &DAG, bool Signed,
7462 SmallVectorImpl<SDValue> &Results) const {
7463 const auto &DL = DAG.getDataLayout();
7464 const auto &TLI = DAG.getTargetLoweringInfo();
7465
7466 assert(Op.getValueType() == MVT::i64 &&((Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7467, __PRETTY_FUNCTION__))
7467 "unexpected type for custom lowering DIV")((Op.getValueType() == MVT::i64 && "unexpected type for custom lowering DIV"
) ? static_cast<void> (0) : __assert_fail ("Op.getValueType() == MVT::i64 && \"unexpected type for custom lowering DIV\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7467, __PRETTY_FUNCTION__))
;
7468 SDLoc dl(Op);
7469
7470 SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
7471
7472 SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
7473
7474 SDValue Lower = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Result);
7475 SDValue Upper = DAG.getNode(ISD::SRL, dl, MVT::i64, Result,
7476 DAG.getConstant(32, dl, TLI.getPointerTy(DL)));
7477 Upper = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Upper);
7478
7479 Results.push_back(Lower);
7480 Results.push_back(Upper);
7481}
7482
7483static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) {
7484 if (isStrongerThanMonotonic(cast<AtomicSDNode>(Op)->getOrdering()))
7485 // Acquire/Release load/store is not legal for targets without a dmb or
7486 // equivalent available.
7487 return SDValue();
7488
7489 // Monotonic load/store is legal for all targets.
7490 return Op;
7491}
7492
7493static void ReplaceREADCYCLECOUNTER(SDNode *N,
7494 SmallVectorImpl<SDValue> &Results,
7495 SelectionDAG &DAG,
7496 const ARMSubtarget *Subtarget) {
7497 SDLoc DL(N);
7498 // Under Power Management extensions, the cycle-count is:
7499 // mrc p15, #0, <Rt>, c9, c13, #0
7500 SDValue Ops[] = { N->getOperand(0), // Chain
7501 DAG.getConstant(Intrinsic::arm_mrc, DL, MVT::i32),
7502 DAG.getConstant(15, DL, MVT::i32),
7503 DAG.getConstant(0, DL, MVT::i32),
7504 DAG.getConstant(9, DL, MVT::i32),
7505 DAG.getConstant(13, DL, MVT::i32),
7506 DAG.getConstant(0, DL, MVT::i32)
7507 };
7508
7509 SDValue Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL,
7510 DAG.getVTList(MVT::i32, MVT::Other), Ops);
7511 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Cycles32,
7512 DAG.getConstant(0, DL, MVT::i32)));
7513 Results.push_back(Cycles32.getValue(1));
7514}
7515
7516static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
7517 SDLoc dl(V.getNode());
7518 SDValue VLo = DAG.getAnyExtOrTrunc(V, dl, MVT::i32);
7519 SDValue VHi = DAG.getAnyExtOrTrunc(
7520 DAG.getNode(ISD::SRL, dl, MVT::i64, V, DAG.getConstant(32, dl, MVT::i32)),
7521 dl, MVT::i32);
7522 SDValue RegClass =
7523 DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
7524 SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
7525 SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
7526 const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
7527 return SDValue(
7528 DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
7529}
7530
7531static void ReplaceCMP_SWAP_64Results(SDNode *N,
7532 SmallVectorImpl<SDValue> & Results,
7533 SelectionDAG &DAG) {
7534 assert(N->getValueType(0) == MVT::i64 &&((N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7535, __PRETTY_FUNCTION__))
7535 "AtomicCmpSwap on types less than 64 should be legal")((N->getValueType(0) == MVT::i64 && "AtomicCmpSwap on types less than 64 should be legal"
) ? static_cast<void> (0) : __assert_fail ("N->getValueType(0) == MVT::i64 && \"AtomicCmpSwap on types less than 64 should be legal\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7535, __PRETTY_FUNCTION__))
;
7536 SDValue Ops[] = {N->getOperand(1),
7537 createGPRPairNode(DAG, N->getOperand(2)),
7538 createGPRPairNode(DAG, N->getOperand(3)),
7539 N->getOperand(0)};
7540 SDNode *CmpSwap = DAG.getMachineNode(
7541 ARM::CMP_SWAP_64, SDLoc(N),
7542 DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
7543
7544 MachineFunction &MF = DAG.getMachineFunction();
7545 MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
7546 MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
7547 cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
7548
7549 Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32,
7550 SDValue(CmpSwap, 0)));
7551 Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32,
7552 SDValue(CmpSwap, 0)));
7553 Results.push_back(SDValue(CmpSwap, 2));
7554}
7555
7556static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
7557 SelectionDAG &DAG) {
7558 const auto &TLI = DAG.getTargetLoweringInfo();
7559
7560 assert(Subtarget.getTargetTriple().isOSMSVCRT() &&((Subtarget.getTargetTriple().isOSMSVCRT() && "Custom lowering is MSVCRT specific!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.getTargetTriple().isOSMSVCRT() && \"Custom lowering is MSVCRT specific!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7561, __PRETTY_FUNCTION__))
7561 "Custom lowering is MSVCRT specific!")((Subtarget.getTargetTriple().isOSMSVCRT() && "Custom lowering is MSVCRT specific!"
) ? static_cast<void> (0) : __assert_fail ("Subtarget.getTargetTriple().isOSMSVCRT() && \"Custom lowering is MSVCRT specific!\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7561, __PRETTY_FUNCTION__))
;
7562
7563 SDLoc dl(Op);
7564 SDValue Val = Op.getOperand(0);
7565 MVT Ty = Val->getSimpleValueType(0);
7566 SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
7567 SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
7568 TLI.getPointerTy(DAG.getDataLayout()));
7569
7570 TargetLowering::ArgListTy Args;
7571 TargetLowering::ArgListEntry Entry;
7572
7573 Entry.Node = Val;
7574 Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
7575 Entry.IsZExt = true;
7576 Args.push_back(Entry);
7577
7578 Entry.Node = Exponent;
7579 Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
7580 Entry.IsZExt = true;
7581 Args.push_back(Entry);
7582
7583 Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
7584
7585 // In the in-chain to the call is the entry node If we are emitting a
7586 // tailcall, the chain will be mutated if the node has a non-entry input
7587 // chain.
7588 SDValue InChain = DAG.getEntryNode();
7589 SDValue TCChain = InChain;
7590
7591 const auto *F = DAG.getMachineFunction().getFunction();
7592 bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
7593 F->getReturnType() == LCRTy;
7594 if (IsTC)
7595 InChain = TCChain;
7596
7597 TargetLowering::CallLoweringInfo CLI(DAG);
7598 CLI.setDebugLoc(dl)
7599 .setChain(InChain)
7600 .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
7601 .setTailCall(IsTC);
7602 std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
7603
7604 // Return the chain (the DAG root) if it is a tail call
7605 return !CI.second.getNode() ? DAG.getRoot() : CI.first;
7606}
7607
7608SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
7609 switch (Op.getOpcode()) {
7610 default: llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7610)
;
7611 case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
7612 case ISD::ConstantPool:
7613 if (Subtarget->genExecuteOnly())
7614 llvm_unreachable("execute-only should not generate constant pools")::llvm::llvm_unreachable_internal("execute-only should not generate constant pools"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7614)
;
7615 return LowerConstantPool(Op, DAG);
7616 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
7617 case ISD::GlobalAddress:
7618 switch (Subtarget->getTargetTriple().getObjectFormat()) {
7619 default: llvm_unreachable("unknown object format")::llvm::llvm_unreachable_internal("unknown object format", "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7619)
;
7620 case Triple::COFF:
7621 return LowerGlobalAddressWindows(Op, DAG);
7622 case Triple::ELF:
7623 return LowerGlobalAddressELF(Op, DAG);
7624 case Triple::MachO:
7625 return LowerGlobalAddressDarwin(Op, DAG);
7626 }
7627 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
7628 case ISD::SELECT: return LowerSELECT(Op, DAG);
7629 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
7630 case ISD::BR_CC: return LowerBR_CC(Op, DAG);
7631 case ISD::BR_JT: return LowerBR_JT(Op, DAG);
7632 case ISD::VASTART: return LowerVASTART(Op, DAG);
7633 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget);
7634 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget);
7635 case ISD::SINT_TO_FP:
7636 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
7637 case ISD::FP_TO_SINT:
7638 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG);
7639 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG);
7640 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
7641 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
7642 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG);
7643 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG);
7644 case ISD::EH_SJLJ_SETUP_DISPATCH: return LowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
7645 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG,
7646 Subtarget);
7647 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG);
7648 case ISD::SHL:
7649 case ISD::SRL:
7650 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
7651 case ISD::SREM: return LowerREM(Op.getNode(), DAG);
7652 case ISD::UREM: return LowerREM(Op.getNode(), DAG);
7653 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
7654 case ISD::SRL_PARTS:
7655 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
7656 case ISD::CTTZ:
7657 case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op.getNode(), DAG, Subtarget);
7658 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget);
7659 case ISD::SETCC: return LowerVSETCC(Op, DAG);
7660 case ISD::SETCCE: return LowerSETCCE(Op, DAG);
7661 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget);
7662 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget);
7663 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
7664 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
7665 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
7666 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
7667 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
7668 case ISD::MUL: return LowerMUL(Op, DAG);
7669 case ISD::SDIV:
7670 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
7671 return LowerDIV_Windows(Op, DAG, /* Signed */ true);
7672 return LowerSDIV(Op, DAG);
7673 case ISD::UDIV:
7674 if (Subtarget->isTargetWindows() && !Op.getValueType().isVector())
7675 return LowerDIV_Windows(Op, DAG, /* Signed */ false);
7676 return LowerUDIV(Op, DAG);
7677 case ISD::ADDC:
7678 case ISD::ADDE:
7679 case ISD::SUBC:
7680 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG);
7681 case ISD::SADDO:
7682 case ISD::UADDO:
7683 case ISD::SSUBO:
7684 case ISD::USUBO:
7685 return LowerXALUO(Op, DAG);
7686 case ISD::ATOMIC_LOAD:
7687 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG);
7688 case ISD::FSINCOS: return LowerFSINCOS(Op, DAG);
7689 case ISD::SDIVREM:
7690 case ISD::UDIVREM: return LowerDivRem(Op, DAG);
7691 case ISD::DYNAMIC_STACKALLOC:
7692 if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment())
7693 return LowerDYNAMIC_STACKALLOC(Op, DAG);
7694 llvm_unreachable("Don't know how to custom lower this!")::llvm::llvm_unreachable_internal("Don't know how to custom lower this!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7694)
;
7695 case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
7696 case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
7697 case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
7698 case ARMISD::WIN__DBZCHK: return SDValue();
7699 }
7700}
7701
7702/// ReplaceNodeResults - Replace the results of node with an illegal result
7703/// type with new values built out of custom code.
7704void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
7705 SmallVectorImpl<SDValue> &Results,
7706 SelectionDAG &DAG) const {
7707 SDValue Res;
7708 switch (N->getOpcode()) {
7709 default:
7710 llvm_unreachable("Don't know how to custom expand this!")::llvm::llvm_unreachable_internal("Don't know how to custom expand this!"
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7710)
;
7711 case ISD::READ_REGISTER:
7712 ExpandREAD_REGISTER(N, Results, DAG);
7713 break;
7714 case ISD::BITCAST:
7715 Res = ExpandBITCAST(N, DAG);
7716 break;
7717 case ISD::SRL:
7718 case ISD::SRA:
7719 Res = Expand64BitShift(N, DAG, Subtarget);
7720 break;
7721 case ISD::SREM:
7722 case ISD::UREM:
7723 Res = LowerREM(N, DAG);
7724 break;
7725 case ISD::SDIVREM:
7726 case ISD::UDIVREM:
7727 Res = LowerDivRem(SDValue(N, 0), DAG);
7728 assert(Res.getNumOperands() == 2 && "DivRem needs two values")((Res.getNumOperands() == 2 && "DivRem needs two values"
) ? static_cast<void> (0) : __assert_fail ("Res.getNumOperands() == 2 && \"DivRem needs two values\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7728, __PRETTY_FUNCTION__))
;
7729 Results.push_back(Res.getValue(0));
7730 Results.push_back(Res.getValue(1));
7731 return;
7732 case ISD::READCYCLECOUNTER:
7733 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget);
7734 return;
7735 case ISD::UDIV:
7736 case ISD::SDIV:
7737 assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows")((Subtarget->isTargetWindows() && "can only expand DIV on Windows"
) ? static_cast<void> (0) : __assert_fail ("Subtarget->isTargetWindows() && \"can only expand DIV on Windows\""
, "/tmp/buildd/llvm-toolchain-snapshot-5.0~svn298304/lib/Target/ARM/ARMISelLowering.cpp"
, 7737, __PRETTY_FUNCTION__))
;
7738 return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,